Files
microcap_scrapping/export_csv.py
T

273 lines
8.4 KiB
Python
Raw Normal View History

"""
Export stock data to CSV format
Creates comprehensive CSV files with all data
"""
import csv
import json
import os
import sqlite3
from datetime import datetime
from typing import List, Dict, Any
from config import DATABASE_PATH, CSV_EXPORT_PATH, DETAILED_CSV_PATH
class CSVExporter:
def __init__(self, db_path=DATABASE_PATH):
self.db_path = db_path
self.conn = sqlite3.connect(db_path)
self.cursor = self.conn.cursor()
def export_stock_list(self, output_file=CSV_EXPORT_PATH):
"""Export basic stock list to CSV"""
print(f"\n📤 Exporting stock list to {output_file}...")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
self.cursor.execute("""
SELECT
s.symbol,
s.company_name,
s.exchange,
s.sector,
s.industry,
s.country,
s.listing_date,
c.has_financials,
c.has_ttm,
c.has_news,
c.has_press_releases,
c.has_filings,
c.has_tax_disclosures,
c.has_agm_info,
c.last_updated
FROM stocks_master s
LEFT JOIN coverage_report c ON s.symbol = c.ticker
ORDER BY s.symbol
""")
rows = self.cursor.fetchall()
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# Header
writer.writerow([
'Ticker',
'Company Name',
'Exchange',
'Sector',
'Industry',
'Country',
'Listing Date',
'Has Financials',
'Has TTM',
'Has News',
'Has Press Releases',
'Has Filings',
'Has Tax Disclosures',
'Has AGM Info',
'Last Updated'
])
# Data
writer.writerows(rows)
print(f"✅ Exported {len(rows)} stocks to CSV")
return output_file
def export_detailed_financials(self, output_file=DETAILED_CSV_PATH):
"""Export detailed financial metrics to CSV"""
print(f"\n📤 Exporting detailed financials to {output_file}...")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
# Get stocks with financial metrics
self.cursor.execute("""
SELECT DISTINCT s.symbol
FROM stocks_master s
INNER JOIN financial_metrics m ON s.id = m.stock_id
WHERE m.is_ttm = 1
ORDER BY s.symbol
""")
tickers = [row[0] for row in self.cursor.fetchall()]
if not tickers:
print("⚠️ No financial metrics found in database")
return None
rows = []
for ticker in tickers:
# Get basic info
self.cursor.execute("""
SELECT id, company_name, exchange, sector, industry
FROM stocks_master
WHERE symbol = ?
""", (ticker,))
stock_info = self.cursor.fetchone()
if not stock_info:
continue
stock_id, company_name, exchange, sector, industry = stock_info
# Get TTM metrics
self.cursor.execute("""
SELECT
pe_ratio, peg_ratio, pb_ratio, ps_ratio,
ev_ebitda, dividend_yield,
gross_margin, operating_margin, net_margin,
roe, roa, roic,
debt_to_equity, current_ratio, quick_ratio,
revenue_growth_yoy, eps_growth_yoy,
fcf_yield
FROM financial_metrics
WHERE stock_id = ? AND is_ttm = 1
ORDER BY id DESC
LIMIT 1
""", (stock_id,))
metrics = self.cursor.fetchone()
if metrics:
row = [ticker, company_name, exchange, sector, industry] + list(metrics)
rows.append(row)
# Write to CSV
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# Header
writer.writerow([
'Ticker', 'Company', 'Exchange', 'Sector', 'Industry',
'P/E', 'PEG', 'P/B', 'P/S',
'EV/EBITDA', 'Div Yield',
'Gross Margin', 'Operating Margin', 'Net Margin',
'ROE', 'ROA', 'ROIC',
'Debt/Equity', 'Current Ratio', 'Quick Ratio',
'Revenue Growth YoY', 'EPS Growth YoY',
'FCF Yield'
])
# Data
writer.writerows(rows)
print(f"✅ Exported {len(rows)} stocks with detailed metrics")
return output_file
def export_news_summary(self, output_file="data/exports/news_summary.csv"):
"""Export news article summary"""
print(f"\n📤 Exporting news summary to {output_file}...")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
self.cursor.execute("""
SELECT
s.symbol,
s.company_name,
n.title,
n.source,
n.published_date,
n.url
FROM news_articles n
INNER JOIN stocks_master s ON n.stock_id = s.id
ORDER BY s.symbol, n.published_date DESC
""")
rows = self.cursor.fetchall()
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Ticker', 'Company', 'Title', 'Source', 'Date', 'URL'])
writer.writerows(rows)
print(f"✅ Exported {len(rows)} news articles")
return output_file
def export_filings_summary(self, output_file="data/exports/filings_summary.csv"):
"""Export regulatory filings summary"""
print(f"\n📤 Exporting filings summary to {output_file}...")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
self.cursor.execute("""
SELECT
s.symbol,
s.company_name,
f.filing_date,
f.filing_type,
f.title,
f.source,
f.document_url
FROM filings f
INNER JOIN stocks_master s ON f.stock_id = s.id
ORDER BY s.symbol, f.filing_date DESC
""")
rows = self.cursor.fetchall()
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Ticker', 'Company', 'Filing Date', 'Type', 'Title', 'Source', 'URL'])
writer.writerows(rows)
print(f"✅ Exported {len(rows)} filings")
return output_file
def export_all(self):
"""Export all data to CSV files"""
print("\n" + "=" * 70)
print("CSV EXPORT - ALL DATA")
print("=" * 70)
files_created = []
# Export basic stock list
f1 = self.export_stock_list()
if f1:
files_created.append(f1)
# Export detailed financials
f2 = self.export_detailed_financials()
if f2:
files_created.append(f2)
# Export news
f3 = self.export_news_summary()
if f3:
files_created.append(f3)
# Export filings
f4 = self.export_filings_summary()
if f4:
files_created.append(f4)
print("\n" + "=" * 70)
print(f"✅ Created {len(files_created)} CSV files:")
for f in files_created:
print(f" - {f}")
print("=" * 70)
return files_created
def close(self):
self.conn.close()
def main():
"""Export data to CSV"""
if not os.path.exists(DATABASE_PATH):
print(f"❌ Database not found at {DATABASE_PATH}")
print(" Run the main pipeline first to collect data")
return
exporter = CSVExporter()
exporter.export_all()
exporter.close()
if __name__ == "__main__":
main()