""" Example analysis script - What you can do with the collected data """ import sqlite3 import json import os from collections import defaultdict class StockAnalyzer: def __init__(self, db_path="data/stocks.db"): self.conn = sqlite3.connect(db_path) self.cursor = self.conn.cursor() def get_summary_stats(self): """Get overall statistics""" print("\n" + "=" * 70) print("DATABASE SUMMARY STATISTICS") print("=" * 70) # Total stocks self.cursor.execute("SELECT COUNT(*) FROM stocks_master") total_stocks = self.cursor.fetchone()[0] print(f"\nTotal stocks tracked: {total_stocks}") # By exchange self.cursor.execute(""" SELECT exchange, COUNT(*) as count FROM stocks_master GROUP BY exchange ORDER BY count DESC """) print("\nStocks by exchange:") for exchange, count in self.cursor.fetchall(): print(f" {exchange}: {count}") # By sector (if available) self.cursor.execute(""" SELECT sector, COUNT(*) as count FROM stocks_master WHERE sector IS NOT NULL AND sector != '' GROUP BY sector ORDER BY count DESC LIMIT 10 """) sectors = self.cursor.fetchall() if sectors: print("\nTop 10 sectors:") for sector, count in sectors: print(f" {sector}: {count}") # Coverage stats self.cursor.execute(""" SELECT SUM(CASE WHEN has_financials = 1 THEN 1 ELSE 0 END) as with_financials, SUM(CASE WHEN has_news = 1 THEN 1 ELSE 0 END) as with_news, SUM(CASE WHEN has_press_releases = 1 THEN 1 ELSE 0 END) as with_pr, SUM(CASE WHEN has_financials = 1 AND has_news = 1 AND has_press_releases = 1 THEN 1 ELSE 0 END) as complete FROM coverage_report """) fin, news, pr, complete = self.cursor.fetchone() print("\nData coverage:") print(f" Stocks with financials: {fin}") print(f" Stocks with news: {news}") print(f" Stocks with press releases: {pr}") print(f" Fully covered stocks: {complete}") def find_recent_news_activity(self, limit=20): """Find stocks with most recent news""" print("\n" + "=" * 70) print(f"TOP {limit} STOCKS BY NEWS ACTIVITY") print("=" * 70) # Load news files and count articles news_dir = "data/news" if not os.path.exists(news_dir): print("No news data available yet") return stock_news_count = [] for filename in os.listdir(news_dir): if filename.endswith('_news_pr.json'): ticker = filename.replace('_news_pr.json', '') filepath = os.path.join(news_dir, filename) with open(filepath, 'r') as f: data = json.load(f) news_count = len(data.get('news_articles', [])) pr_count = len(data.get('press_releases', [])) if news_count > 0 or pr_count > 0: stock_news_count.append({ 'ticker': ticker, 'news': news_count, 'pr': pr_count, 'total': news_count + pr_count }) # Sort by total stock_news_count.sort(key=lambda x: x['total'], reverse=True) print(f"\n{'Ticker':<10} {'News':<10} {'PR':<10} {'Total':<10}") print("-" * 40) for stock in stock_news_count[:limit]: print(f"{stock['ticker']:<10} {stock['news']:<10} {stock['pr']:<10} {stock['total']:<10}") def find_stocks_by_sector(self, sector): """Find all stocks in a sector""" print("\n" + "=" * 70) print(f"STOCKS IN SECTOR: {sector.upper()}") print("=" * 70) self.cursor.execute(""" SELECT symbol, company_name, exchange FROM stocks_master WHERE sector LIKE ? ORDER BY symbol """, (f"%{sector}%",)) stocks = self.cursor.fetchall() if stocks: print(f"\nFound {len(stocks)} stocks:") for symbol, name, exchange in stocks: print(f" {symbol:<8} {name:<50} [{exchange}]") else: print(f"\nNo stocks found in sector: {sector}") def get_stock_report(self, ticker): """Get full report for a stock""" print("\n" + "=" * 70) print(f"STOCK REPORT: {ticker}") print("=" * 70) # Get basic info self.cursor.execute(""" SELECT company_name, exchange, sector, industry, listing_date FROM stocks_master WHERE symbol = ? """, (ticker,)) result = self.cursor.fetchone() if not result: print(f"\nStock {ticker} not found in database") return name, exchange, sector, industry, listing_date = result print(f"\nCompany: {name}") print(f"Exchange: {exchange}") if sector: print(f"Sector: {sector}") if industry: print(f"Industry: {industry}") if listing_date: print(f"Listing Date: {listing_date}") # Check coverage self.cursor.execute(""" SELECT has_financials, has_news, has_press_releases FROM coverage_report WHERE ticker = ? """, (ticker,)) coverage = self.cursor.fetchone() if coverage: has_fin, has_news, has_pr = coverage print(f"\nData Coverage:") print(f" Financials: {'✅' if has_fin else '❌'}") print(f" News: {'✅' if has_news else '❌'}") print(f" Press Releases: {'✅' if has_pr else '❌'}") # Load financial data if available fin_file = f"data/financials/{ticker}_yahoo.json" if os.path.exists(fin_file): print(f"\nFinancial Data: (see {fin_file})") with open(fin_file, 'r') as f: data = json.load(f) if data.get('profile', {}).get('current_price'): print(f" Current Price: ${data['profile']['current_price']}") # Load news if available news_file = f"data/news/{ticker}_news_pr.json" if os.path.exists(news_file): with open(news_file, 'r') as f: data = json.load(f) news_count = len(data.get('news_articles', [])) pr_count = len(data.get('press_releases', [])) print(f"\nNews & Press Releases:") print(f" News articles: {news_count}") print(f" Press releases: {pr_count}") if news_count > 0: print(f"\n Recent news:") for article in data['news_articles'][:3]: print(f" - {article.get('title', 'N/A')}") # Check if report exists report_file = f"data/reports/{ticker}_report.txt" if os.path.exists(report_file): print(f"\nFull report available at: {report_file}") def export_to_csv(self, output_file="stock_list.csv"): """Export stock list to CSV""" print("\n" + "=" * 70) print(f"EXPORTING TO CSV: {output_file}") print("=" * 70) self.cursor.execute(""" SELECT s.symbol, s.company_name, s.exchange, s.sector, s.industry, c.has_financials, c.has_news, c.has_press_releases FROM stocks_master s LEFT JOIN coverage_report c ON s.symbol = c.ticker ORDER BY s.symbol """) import csv with open(output_file, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['Symbol', 'Company', 'Exchange', 'Sector', 'Industry', 'Has Financials', 'Has News', 'Has PR']) writer.writerows(self.cursor.fetchall()) print(f"\n✅ Exported to {output_file}") def close(self): self.conn.close() def main(): """Example usage""" print("\n" + "=" * 70) print("STOCK DATA ANALYSIS - EXAMPLES") print("=" * 70) # Check if database exists if not os.path.exists("data/stocks.db"): print("\n❌ Database not found!") print(" Run 'python main.py' first to collect data") return analyzer = StockAnalyzer() # Example 1: Get summary statistics analyzer.get_summary_stats() # Example 2: Find most active stocks (by news) analyzer.find_recent_news_activity(limit=10) # Example 3: Find stocks in a sector # analyzer.find_stocks_by_sector("Technology") # Example 4: Get report for specific stock # analyzer.get_stock_report("ABC") # Example 5: Export to CSV # analyzer.export_to_csv("my_stocks.csv") analyzer.close() print("\n" + "=" * 70) print("ANALYSIS COMPLETE") print("=" * 70) print("\nYou can modify this script to:") print(" - Filter stocks by criteria (P/E, market cap, etc.)") print(" - Find stocks with specific keywords in news") print(" - Compare stocks within sectors") print(" - Track changes over time") print(" - Generate custom reports") if __name__ == "__main__": main()