""" Main orchestrator script for the Stock Intelligence System Runs all steps in sequence """ import asyncio import os import json from datetime import datetime import sys # Import our modules from extract_listings import StockListingExtractor from database import StockDatabase from scrape_yahoo_finance import YahooFinanceScraper from scrape_news_pr import NewsPressScraper class StockIntelligenceOrchestrator: def __init__(self): self.db = StockDatabase() self.stats = { 'start_time': datetime.now(), 'stocks_extracted': 0, 'financials_scraped': 0, 'news_scraped': 0, 'errors': [] } async def step1_extract_listings(self): """Step 1: Extract stock listings from exchanges""" print("\n" + "=" * 70) print("STEP 1: EXTRACTING STOCK LISTINGS FROM EXCHANGES") print("=" * 70) extractor = StockListingExtractor() listings = await extractor.extract_all() self.stats['stocks_extracted'] = len(listings) return listings def step2_import_to_database(self): """Step 2: Import listings to database""" print("\n" + "=" * 70) print("STEP 2: IMPORTING LISTINGS TO DATABASE") print("=" * 70) listings_file = "data/listings/all_listings_combined.json" if os.path.exists(listings_file): imported = self.db.import_listings_from_json(listings_file) print(f"āœ… Imported {imported} stocks to database") return imported else: print(f"āŒ Listings file not found: {listings_file}") return 0 async def step3_scrape_financials(self, max_stocks=None): """Step 3: Scrape financial data from Yahoo Finance""" print("\n" + "=" * 70) print("STEP 3: SCRAPING FINANCIAL DATA") print("=" * 70) # Get stocks from database stocks = self.db.get_all_stocks() print(f"šŸ“Š Found {len(stocks)} stocks in database") if max_stocks: stocks = stocks[:max_stocks] print(f"āš ļø Limiting to {max_stocks} stocks for testing") # Convert to list format for scraper stock_list = [] for stock in stocks: stock_list.append({ 'symbol': stock[1], # symbol column 'name': stock[2], # company_name column 'exchange': stock[3] # exchange column }) scraper = YahooFinanceScraper() results = await scraper.scrape_multiple_stocks(stock_list, max_stocks=max_stocks) self.stats['financials_scraped'] = len([r for r in results if not r.get('error')]) # Update coverage in database for result in results: if not result.get('error'): self.db.update_coverage( result['ticker'], has_financials=True, has_ttm=True ) return results async def step4_scrape_news_pr(self, max_stocks=None): """Step 4: Scrape news and press releases""" print("\n" + "=" * 70) print("STEP 4: SCRAPING NEWS & PRESS RELEASES") print("=" * 70) # Get stocks from database stocks = self.db.get_all_stocks() if max_stocks: stocks = stocks[:max_stocks] print(f"āš ļø Limiting to {max_stocks} stocks for testing") # Convert to list format stock_list = [] for stock in stocks: stock_list.append({ 'symbol': stock[1], 'name': stock[2], 'exchange': stock[3] }) scraper = NewsPressScraper() results = await scraper.scrape_multiple_stocks(stock_list, max_stocks=max_stocks) self.stats['news_scraped'] = len(results) # Update coverage in database for result in results: has_news = len(result.get('news_articles', [])) > 0 has_pr = len(result.get('press_releases', [])) > 0 self.db.update_coverage( result['ticker'], has_news=has_news, has_press_releases=has_pr ) return results def step5_generate_reports(self): """Step 5: Generate text reports for each stock""" print("\n" + "=" * 70) print("STEP 5: GENERATING STOCK REPORTS") print("=" * 70) reports_dir = "data/reports" os.makedirs(reports_dir, exist_ok=True) # Get all stocks stocks = self.db.get_all_stocks() reports_generated = 0 for stock in stocks: ticker = stock[1] company_name = stock[2] exchange = stock[3] try: report = self.generate_stock_report(ticker, company_name, exchange) # Save report report_file = f"{reports_dir}/{ticker}_report.txt" with open(report_file, 'w', encoding='utf-8') as f: f.write(report) reports_generated += 1 except Exception as e: print(f"āŒ Error generating report for {ticker}: {e}") self.stats['errors'].append(f"{ticker}: {e}") print(f"āœ… Generated {reports_generated} stock reports") print(f"šŸ“ Reports saved to: {reports_dir}/") return reports_generated def generate_stock_report(self, ticker, company_name, exchange): """Generate a comprehensive text report for a stock""" report = [] report.append("=" * 70) report.append(f"STOCK INTELLIGENCE REPORT: {ticker}") report.append("=" * 70) report.append(f"Company: {company_name}") report.append(f"Exchange: {exchange}") report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") report.append("=" * 70) report.append("") # Load financial data if available financials_file = f"data/financials/{ticker}_yahoo.json" if os.path.exists(financials_file): report.append("[FINANCIAL DATA]") report.append("-" * 70) with open(financials_file, 'r', encoding='utf-8') as f: data = json.load(f) if data.get('profile'): report.append("\nProfile:") for key, value in data['profile'].items(): report.append(f" {key}: {value}") if data.get('statistics'): report.append("\nKey Statistics:") for key, value in data['statistics'].items(): report.append(f" {key}: {value}") report.append("") # Load news if available news_file = f"data/news/{ticker}_news_pr.json" if os.path.exists(news_file): with open(news_file, 'r', encoding='utf-8') as f: data = json.load(f) if data.get('news_articles'): report.append("[NEWS ARTICLES - Last 12 Months]") report.append("-" * 70) for article in data['news_articles'][:10]: report.append(f"\nTitle: {article.get('title', 'N/A')}") report.append(f"Source: {article.get('source', 'N/A')}") report.append(f"Date: {article.get('date', 'N/A')}") report.append(f"URL: {article.get('url', 'N/A')}") if article.get('snippet'): report.append(f"Snippet: {article['snippet']}") report.append("") if data.get('press_releases'): report.append("[PRESS RELEASES]") report.append("-" * 70) for pr in data['press_releases'][:10]: report.append(f"\nTitle: {pr.get('title', 'N/A')}") report.append(f"Source: {pr.get('source', 'N/A')}") report.append(f"Date: {pr.get('date', 'N/A')}") report.append(f"URL: {pr.get('url', 'N/A')}") report.append("") report.append("=" * 70) report.append("END OF REPORT") report.append("=" * 70) return "\n".join(report) def print_final_stats(self): """Print final statistics""" end_time = datetime.now() duration = end_time - self.stats['start_time'] print("\n" + "=" * 70) print("FINAL STATISTICS") print("=" * 70) print(f"Duration: {duration}") print(f"Stocks extracted: {self.stats['stocks_extracted']}") print(f"Financials scraped: {self.stats['financials_scraped']}") print(f"News scraped: {self.stats['news_scraped']}") print(f"Errors: {len(self.stats['errors'])}") if self.stats['errors']: print("\nError summary:") for error in self.stats['errors'][:10]: print(f" - {error}") # Get coverage report from database coverage = self.db.get_coverage_report() print(f"\nCoverage Report:") print(f" Total stocks tracked: {len(coverage)}") complete = sum(1 for c in coverage if c[2] and c[3] and c[4] and c[5]) print(f" Fully covered stocks: {complete}") print("=" * 70) async def run_full_pipeline(self, test_mode=True): """Run the full data collection pipeline""" print("\n" + "=" * 70) print("STOCK INTELLIGENCE AUTOMATION SYSTEM") print("Starting full pipeline...") if test_mode: print("āš ļø RUNNING IN TEST MODE (limited stocks)") print("=" * 70) try: # Step 1: Extract listings listings = await self.step1_extract_listings() if not listings: print("\nāŒ No listings extracted. Check if websites are accessible.") return # Step 2: Import to database self.step2_import_to_database() # Step 3: Scrape financials if test_mode: await self.step3_scrape_financials(max_stocks=5) else: await self.step3_scrape_financials() # Step 4: Scrape news & PR if test_mode: await self.step4_scrape_news_pr(max_stocks=3) else: await self.step4_scrape_news_pr() # Step 5: Generate reports self.step5_generate_reports() # Print stats self.print_final_stats() print("\nāœ… Pipeline completed successfully!") except Exception as e: print(f"\nāŒ Pipeline failed with error: {e}") import traceback traceback.print_exc() finally: self.db.close() async def main(): """Main entry point""" # Check if running in test mode test_mode = "--full" not in sys.argv if test_mode: print("\nāš ļø Running in TEST MODE (limited stocks)") print(" To run full pipeline, use: python main.py --full") orchestrator = StockIntelligenceOrchestrator() await orchestrator.run_full_pipeline(test_mode=test_mode) if __name__ == "__main__": asyncio.run(main())