microcap_scrapping/export_csv.py

"""
Export stock data to CSV format
Creates comprehensive CSV files with all data
"""

import csv
import json
import os
import sqlite3
from datetime import datetime
from typing import List, Dict, Any

from config import DATABASE_PATH, CSV_EXPORT_PATH, DETAILED_CSV_PATH


class CSVExporter:
    def __init__(self, db_path=DATABASE_PATH):
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self.cursor = self.conn.cursor()

    def export_stock_list(self, output_file=CSV_EXPORT_PATH):
        """Export basic stock list to CSV"""
        print(f"\n📤 Exporting stock list to {output_file}...")

        os.makedirs(os.path.dirname(output_file), exist_ok=True)

        self.cursor.execute("""
            SELECT
                s.symbol,
                s.company_name,
                s.exchange,
                s.sector,
                s.industry,
                s.country,
                s.listing_date,
                c.has_financials,
                c.has_ttm,
                c.has_news,
                c.has_press_releases,
                c.has_filings,
                c.has_tax_disclosures,
                c.has_agm_info,
                c.last_updated
            FROM stocks_master s
            LEFT JOIN coverage_report c ON s.symbol = c.ticker
            ORDER BY s.symbol
        """)

        rows = self.cursor.fetchall()

        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)

            # Header
            writer.writerow([
                'Ticker',
                'Company Name',
                'Exchange',
                'Sector',
                'Industry',
                'Country',
                'Listing Date',
                'Has Financials',
                'Has TTM',
                'Has News',
                'Has Press Releases',
                'Has Filings',
                'Has Tax Disclosures',
                'Has AGM Info',
                'Last Updated'
            ])

            # Data
            writer.writerows(rows)

        print(f"✅ Exported {len(rows)} stocks to CSV")
        return output_file

    def export_detailed_financials(self, output_file=DETAILED_CSV_PATH):
        """Export detailed financial metrics to CSV"""
        print(f"\n📤 Exporting detailed financials to {output_file}...")

        os.makedirs(os.path.dirname(output_file), exist_ok=True)

        # Get stocks with financial metrics
        self.cursor.execute("""
            SELECT DISTINCT s.symbol
            FROM stocks_master s
            INNER JOIN financial_metrics m ON s.id = m.stock_id
            WHERE m.is_ttm = 1
            ORDER BY s.symbol
        """)

        tickers = [row[0] for row in self.cursor.fetchall()]

        if not tickers:
            print("⚠️  No financial metrics found in database")
            return None

        rows = []
        for ticker in tickers:
            # Get basic info
            self.cursor.execute("""
                SELECT id, company_name, exchange, sector, industry
                FROM stocks_master
                WHERE symbol = ?
            """, (ticker,))

            stock_info = self.cursor.fetchone()
            if not stock_info:
                continue

            stock_id, company_name, exchange, sector, industry = stock_info

            # Get TTM metrics
            self.cursor.execute("""
                SELECT
                    pe_ratio, peg_ratio, pb_ratio, ps_ratio,
                    ev_ebitda, dividend_yield,
                    gross_margin, operating_margin, net_margin,
                    roe, roa, roic,
                    debt_to_equity, current_ratio, quick_ratio,
                    revenue_growth_yoy, eps_growth_yoy,
                    fcf_yield
                FROM financial_metrics
                WHERE stock_id = ? AND is_ttm = 1
                ORDER BY id DESC
                LIMIT 1
            """, (stock_id,))

            metrics = self.cursor.fetchone()

            if metrics:
                row = [ticker, company_name, exchange, sector, industry] + list(metrics)
                rows.append(row)

        # Write to CSV
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)

            # Header
            writer.writerow([
                'Ticker', 'Company', 'Exchange', 'Sector', 'Industry',
                'P/E', 'PEG', 'P/B', 'P/S',
                'EV/EBITDA', 'Div Yield',
                'Gross Margin', 'Operating Margin', 'Net Margin',
                'ROE', 'ROA', 'ROIC',
                'Debt/Equity', 'Current Ratio', 'Quick Ratio',
                'Revenue Growth YoY', 'EPS Growth YoY',
                'FCF Yield'
            ])

            # Data
            writer.writerows(rows)

        print(f"✅ Exported {len(rows)} stocks with detailed metrics")
        return output_file

    def export_news_summary(self, output_file="data/exports/news_summary.csv"):
        """Export news article summary"""
        print(f"\n📤 Exporting news summary to {output_file}...")

        os.makedirs(os.path.dirname(output_file), exist_ok=True)

        self.cursor.execute("""
            SELECT
                s.symbol,
                s.company_name,
                n.title,
                n.source,
                n.published_date,
                n.url
            FROM news_articles n
            INNER JOIN stocks_master s ON n.stock_id = s.id
            ORDER BY s.symbol, n.published_date DESC
        """)

        rows = self.cursor.fetchall()

        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['Ticker', 'Company', 'Title', 'Source', 'Date', 'URL'])
            writer.writerows(rows)

        print(f"✅ Exported {len(rows)} news articles")
        return output_file

    def export_filings_summary(self, output_file="data/exports/filings_summary.csv"):
        """Export regulatory filings summary"""
        print(f"\n📤 Exporting filings summary to {output_file}...")

        os.makedirs(os.path.dirname(output_file), exist_ok=True)

        self.cursor.execute("""
            SELECT
                s.symbol,
                s.company_name,
                f.filing_date,
                f.filing_type,
                f.title,
                f.source,
                f.document_url
            FROM filings f
            INNER JOIN stocks_master s ON f.stock_id = s.id
            ORDER BY s.symbol, f.filing_date DESC
        """)

        rows = self.cursor.fetchall()

        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['Ticker', 'Company', 'Filing Date', 'Type', 'Title', 'Source', 'URL'])
            writer.writerows(rows)

        print(f"✅ Exported {len(rows)} filings")
        return output_file

    def export_all(self):
        """Export all data to CSV files"""
        print("\n" + "=" * 70)
        print("CSV EXPORT - ALL DATA")
        print("=" * 70)

        files_created = []

        # Export basic stock list
        f1 = self.export_stock_list()
        if f1:
            files_created.append(f1)

        # Export detailed financials
        f2 = self.export_detailed_financials()
        if f2:
            files_created.append(f2)

        # Export news
        f3 = self.export_news_summary()
        if f3:
            files_created.append(f3)

        # Export filings
        f4 = self.export_filings_summary()
        if f4:
            files_created.append(f4)

        print("\n" + "=" * 70)
        print(f"✅ Created {len(files_created)} CSV files:")
        for f in files_created:
            print(f"   - {f}")
        print("=" * 70)

        return files_created

    def close(self):
        self.conn.close()


def main():
    """Export data to CSV"""
    if not os.path.exists(DATABASE_PATH):
        print(f"❌ Database not found at {DATABASE_PATH}")
        print("   Run the main pipeline first to collect data")
        return

    exporter = CSVExporter()
    exporter.export_all()
    exporter.close()


if __name__ == "__main__":
    main()