""" Generate a consolidated company PDF report from all collected data files. Usage: python generate_company_report.py --ticker AAPL The script will: - Collect files from data/financials, data/metrics, data/reports, data/sec_filings, data/sedar_filings, data/serpapi_news, data/news, data/exports - Create a consolidated Markdown file at data/reports/{ticker}_full_report.md - Attempt to render a PDF at data/reports/{ticker}_full_report.pdf using reportlab or fpdf - If PDF libs are missing, only the Markdown will be created and instructions printed """ import os import json import argparse import textwrap from datetime import datetime DATA_DIR = 'data' REPORTS_DIR = os.path.join(DATA_DIR, 'reports') EXPORTS_DIR = os.path.join(DATA_DIR, 'exports') os.makedirs(REPORTS_DIR, exist_ok=True) def read_file_if_exists(path): if os.path.exists(path): try: with open(path, 'r', encoding='utf-8') as f: return f.read() except Exception: return None return None def read_json_if_exists(path): if os.path.exists(path): try: with open(path, 'r', encoding='utf-8') as f: return json.load(f) except Exception: return None return None def gather_contents(ticker): t = ticker.upper() parts = [] header = f"Company Consolidated Report - {t}\nGenerated: {datetime.now().isoformat()}\n" parts.append(header) parts.append('---\n') # Stocks master entry parts.append('STOCK LISTING ENTRY:\n') # Query database file try: import sqlite3 conn = sqlite3.connect('data/stocks.db') cur = conn.cursor() cur.execute('SELECT * FROM stocks_master WHERE symbol = ?', (t,)) row = cur.fetchone() if row: cols = [c[0] for c in cur.execute('PRAGMA table_info(stocks_master)').fetchall()] parts.append(json.dumps(dict(zip(cols, row)), indent=2)) else: parts.append('No stocks_master entry found for ' + t) conn.close() except Exception as e: parts.append('Could not read stocks.db: ' + str(e)) parts.append('\n') # Exports - list export files & include small previews parts.append('EXPORTS:\n') exports = [] for fname in os.listdir(EXPORTS_DIR) if os.path.exists(EXPORTS_DIR) else []: exports.append(fname) parts.append('\n'.join(exports) or 'No export files found') parts.append('\n') # Financials parts.append('FINANCIALS (Yahoo scraped):\n') fin_path = os.path.join(DATA_DIR, 'financials', f'{t}_yahoo.json') fin = read_json_if_exists(fin_path) if fin is None: parts.append('No Yahoo Finance file: ' + fin_path) else: # Merge quote data into statistics for display if 'quote' in fin and 'statistics' in fin: quote = fin.get('quote', {}) stats = fin.get('statistics', {}) # Remove empty quote fields from statistics (they're placeholders) quote_keys = ['date', 'close', 'open', 'high', 'low', 'volume'] for key in quote_keys: if key in stats and not stats[key]: del stats[key] # Add quote data at the top of statistics merged_stats = { 'date': quote.get('date', ''), 'close': quote.get('close', ''), 'open': quote.get('open', ''), 'high': quote.get('high', ''), 'low': quote.get('low', ''), 'volume': quote.get('volume', ''), } # Merge remaining statistics merged_stats.update(stats) fin['statistics'] = merged_stats parts.append(json.dumps(fin, indent=2)) parts.append('\n') # Metrics parts.append('CALCULATED METRICS:\n') metrics_path = os.path.join(DATA_DIR, 'metrics', f'{t}_calculated_metrics.json') metrics = read_json_if_exists(metrics_path) if metrics is None: parts.append('No calculated metrics file: ' + metrics_path) else: parts.append(json.dumps(metrics, indent=2)) parts.append('\n') # Reports (comprehensive) parts.append('GENERATED REPORT (text):\n') rpt_path = os.path.join(DATA_DIR, 'reports', f'{t}_comprehensive_report.txt') rpt = read_file_if_exists(rpt_path) if rpt is None: parts.append('No comprehensive report found: ' + rpt_path) else: parts.append(rpt) parts.append('\n') # SEC filings parts.append('SEC FILINGS (EDGAR):\n') sec_path = os.path.join(DATA_DIR, 'sec_filings', f'{t}_sec_filings.json') sec = read_json_if_exists(sec_path) if sec is None: parts.append('No SEC filings file: ' + sec_path) else: parts.append(json.dumps(sec, indent=2)) parts.append('\n') # SEDAR filings parts.append('SEDAR+ FILINGS (if any):\n') sedar_path = os.path.join(DATA_DIR, 'sedar_filings', f'{t}_sedar_data.json') sedar = read_json_if_exists(sedar_path) if sedar is None: parts.append('No SEDAR+ file: ' + sedar_path) else: parts.append(json.dumps(sedar, indent=2)) parts.append('\n') # SerpAPI news parts.append('SERPAPI NEWS (collected):\n') serp_path = os.path.join(DATA_DIR, 'serpapi_news', f'{t}_serpapi.json') serp = read_json_if_exists(serp_path) if serp is None: parts.append('No SerpAPI news file: ' + serp_path) else: parts.append(json.dumps(serp, indent=2)) parts.append('\n') # Regular news PR parts.append('DIRECT NEWS/PR SCRAPES (if any):\n') news_path = os.path.join(DATA_DIR, 'news', f'{t}_news_pr.json') news = read_json_if_exists(news_path) if news is None: parts.append('No direct news/pr file: ' + news_path) else: parts.append(json.dumps(news, indent=2)) parts.append('\n') return '\n'.join(parts) def save_markdown(ticker, content): md_path = os.path.join(REPORTS_DIR, f'{ticker}_full_report.md') with open(md_path, 'w', encoding='utf-8') as f: f.write(content) return md_path def render_pdf_from_text(ticker, text, pdf_path): # Try reportlab first try: from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas import textwrap c = canvas.Canvas(pdf_path, pagesize=letter) width, height = letter left_margin = 40 right_margin = 40 top_margin = 40 bottom_margin = 40 usable_width = width - left_margin - right_margin y = height - top_margin wrapper = textwrap.TextWrapper(width=95) for paragraph in text.split('\n'): lines = wrapper.wrap(paragraph) if not lines: y -= 12 for line in lines: if y < bottom_margin + 12: c.showPage() y = height - top_margin c.setFont('Helvetica', 9) c.drawString(left_margin, y, line) y -= 12 c.save() return True, None except Exception as e: # Try fpdf try: from fpdf import FPDF pdf = FPDF() pdf.set_auto_page_break(auto=True, margin=15) pdf.add_page() pdf.set_font('Arial', size=10) for paragraph in text.split('\n'): for line in textwrap.wrap(paragraph, 90): pdf.cell(0, 6, line.encode('latin-1', 'replace').decode('latin-1'), ln=1) pdf.output(pdf_path) return True, None except Exception as e2: return False, f'ReportLab and FPDF not available or failed: {e} / {e2}' def main(): parser = argparse.ArgumentParser() parser.add_argument('--ticker', '-t', default='AAPL', help='Ticker to generate report for') args = parser.parse_args() ticker = args.ticker.upper() print(f'Gathering data for {ticker}...') content = gather_contents(ticker) md_path = save_markdown(ticker, content) print('Markdown saved to', md_path) pdf_path = os.path.join(REPORTS_DIR, f'{ticker}_full_report.pdf') ok, err = render_pdf_from_text(ticker, content, pdf_path) if ok: print('PDF generated at', pdf_path) else: print('PDF generation failed:', err) print('Markdown is available. Convert to PDF with pandoc or wkhtmltopdf:') print(f' pandoc {md_path} -o {pdf_path} # or use your preferred tool') if __name__ == '__main__': main()