""" Populate database with existing JSON data This script reads all existing JSON files and inserts data into the database """ import os import json from datetime import datetime from database import StockDatabase def populate_from_existing_data(): """Read all existing JSON files and populate database""" print("\n" + "=" * 70) print("POPULATING DATABASE FROM EXISTING JSON FILES") print("=" * 70) db = StockDatabase() stats = { 'metrics': 0, 'news': 0, 'filings': 0 } # 1. Import calculated metrics print("\nšŸ“Š Importing financial metrics...") metrics_dir = "data/metrics" if os.path.exists(metrics_dir): for filename in os.listdir(metrics_dir): if filename.endswith('_calculated_metrics.json'): ticker = filename.replace('_calculated_metrics.json', '') filepath = os.path.join(metrics_dir, filename) try: with open(filepath, 'r') as f: metrics = json.load(f) # Insert metrics into database current_year = datetime.now().year success = db.insert_financial_metrics(ticker, current_year, metrics, is_ttm=True) if success: stats['metrics'] += 1 print(f" āœ“ {ticker}: {len(metrics)} metrics") except Exception as e: print(f" āœ— {ticker}: {e}") # 2. Import news articles (from both regular scraping and SerpAPI) print("\nšŸ“° Importing news articles...") # Regular news news_dir = "data/news" if os.path.exists(news_dir): for filename in os.listdir(news_dir): if filename.endswith('_news_pr.json'): ticker = filename.replace('_news_pr.json', '') filepath = os.path.join(news_dir, filename) try: with open(filepath, 'r') as f: data = json.load(f) # Insert news articles articles = data.get('news_articles', []) for article in articles: success = db.insert_news_article( ticker=ticker, title=article.get('title', ''), source=article.get('source', ''), published_date=article.get('date', ''), url=article.get('url', ''), snippet=article.get('snippet', '') ) if success: stats['news'] += 1 # Insert press releases prs = data.get('press_releases', []) for pr in prs: success = db.insert_news_article( ticker=ticker, title=pr.get('title', ''), source=pr.get('source', 'Press Release'), published_date=pr.get('date', ''), url=pr.get('url', ''), snippet=pr.get('snippet', '') ) if success: stats['news'] += 1 if articles or prs: print(f" āœ“ {ticker}: {len(articles)} articles, {len(prs)} PRs") except Exception as e: print(f" āœ— {ticker}: {e}") # SerpAPI news serpapi_dir = "data/serpapi_news" if os.path.exists(serpapi_dir): for filename in os.listdir(serpapi_dir): if filename.endswith('_serpapi.json'): ticker = filename.replace('_serpapi.json', '') filepath = os.path.join(serpapi_dir, filename) try: with open(filepath, 'r') as f: data = json.load(f) # Insert news articles articles = data.get('news_articles', []) for article in articles: success = db.insert_news_article( ticker=ticker, title=article.get('title', ''), source=article.get('source', ''), published_date=article.get('date', ''), url=article.get('link', ''), snippet=article.get('snippet', '') ) if success: stats['news'] += 1 # Insert press releases prs = data.get('press_releases', []) for pr in prs: success = db.insert_news_article( ticker=ticker, title=pr.get('title', ''), source=pr.get('source', 'Press Release'), published_date=pr.get('date', ''), url=pr.get('link', ''), snippet=pr.get('snippet', '') ) if success: stats['news'] += 1 if articles or prs: print(f" āœ“ {ticker}: {len(articles)} SerpAPI articles, {len(prs)} PRs") except Exception as e: print(f" āœ— {ticker}: {e}") # 3. Import SEC filings print("\nšŸ“„ Importing SEC EDGAR filings...") sec_dir = "data/sec_filings" if os.path.exists(sec_dir): for filename in os.listdir(sec_dir): if filename.endswith('_sec_filings.json'): ticker = filename.replace('_sec_filings.json', '') filepath = os.path.join(sec_dir, filename) try: with open(filepath, 'r') as f: data = json.load(f) # Insert filings filings = data.get('filings', []) for filing in filings: db.insert_filing( ticker=ticker, filing_date=filing.get('filing_date', ''), filing_type=filing.get('form_type', ''), title=filing.get('description', ''), document_url=filing.get('url', ''), source='SEC EDGAR' ) stats['filings'] += 1 # Insert ownership forms ownership = data.get('insider_ownership', []) for form in ownership: db.insert_filing( ticker=ticker, filing_date=form.get('filing_date', ''), filing_type=form.get('form_type', ''), title=f"Insider Transaction - {form.get('owner', '')}", document_url=form.get('url', ''), source='SEC EDGAR - Ownership' ) stats['filings'] += 1 if filings or ownership: print(f" āœ“ {ticker}: {len(filings)} filings, {len(ownership)} ownership") except Exception as e: print(f" āœ— {ticker}: {e}") # 4. Import SEDAR+ filings print("\nšŸ“„ Importing SEDAR+ filings...") sedar_dir = "data/sedar_filings" if os.path.exists(sedar_dir): for filename in os.listdir(sedar_dir): if filename.endswith('_sedar_data.json'): ticker = filename.replace('_sedar_data.json', '') filepath = os.path.join(sedar_dir, filename) try: with open(filepath, 'r') as f: data = json.load(f) # Insert filings filings = data.get('filings', []) for filing in filings: db.insert_filing( ticker=ticker, filing_date=filing.get('date', ''), filing_type=filing.get('type', ''), title=filing.get('title', ''), document_url=filing.get('url', ''), source='SEDAR+' ) stats['filings'] += 1 if filings: print(f" āœ“ {ticker}: {len(filings)} SEDAR+ filings") except Exception as e: print(f" āœ— {ticker}: {e}") # Print final stats print("\n" + "=" * 70) print("DATABASE POPULATION COMPLETE") print("=" * 70) print(f"Financial metrics inserted: {stats['metrics']}") print(f"News articles inserted: {stats['news']}") print(f"Filings inserted: {stats['filings']}") print("=" * 70) db.close() if __name__ == "__main__": populate_from_existing_data()