feat: Implement stock listing extraction and database population

- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright.
- Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation.
- Developed `populate_database.py` to populate the database with existing JSON data.
- Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks.
- Added `setup.py` for initial setup and testing of the system.
- Created `watchlist.txt` template for user-defined stock tracking.
- Generated `final_test_output.txt` to log the results of the test run.
This commit is contained in:
Aherobo Ovie Victor
2025-11-06 12:34:01 +01:00
parent 389a01cb0a
commit 80ee708348
39 changed files with 8513 additions and 0 deletions
+181
View File
@@ -0,0 +1,181 @@
#!/usr/bin/env python3
"""
Daily automation script
Runs updates for all stocks or specific tickers
"""
import asyncio
import os
import sys
import json
from datetime import datetime
import subprocess
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from main_robust import RobustStockIntelligence
async def run_daily_update():
"""Run daily updates for all stocks"""
print("=" * 70)
print("DAILY STOCK INTELLIGENCE UPDATE")
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 70)
orchestrator = RobustStockIntelligence()
# Get all stocks from database
stocks = orchestrator.db.get_all_stocks()
if not stocks:
print("❌ No stocks in database. Run full extraction first.")
return
print(f"\n📊 Found {len(stocks)} stocks to update")
# Convert to list format
stock_list = []
for stock in stocks:
stock_list.append({
'symbol': stock[1],
'name': stock[2],
'exchange': stock[3]
})
# Update financials (daily)
print("\n🔄 Updating financials...")
await orchestrator.step3_scrape_financials(stock_list)
# Calculate metrics
print("\n🔄 Calculating metrics...")
financial_files = [f"data/financials/{s['symbol']}_yahoo.json" for s in stock_list]
financial_data = []
for f in financial_files:
if os.path.exists(f):
with open(f, 'r') as file:
financial_data.append(json.load(file))
await orchestrator.step4_calculate_metrics(financial_data)
# Update news (daily)
print("\n🔄 Updating news...")
await orchestrator.step5_scrape_news_pr(stock_list, use_serpapi=True)
# Update filings (check for new ones)
print("\n🔄 Checking for new filings...")
us_stocks = [s for s in stock_list if s.get('exchange') in ['CBOE', 'NYSE', 'NASDAQ']]
canadian_stocks = [s for s in stock_list if s.get('exchange') in ['TSX', 'TSXV', 'CSE']]
if us_stocks:
await orchestrator.step6_scrape_sec_filings(us_stocks[:50]) # Limit per day
if canadian_stocks:
await orchestrator.step7_scrape_sedar_filings(canadian_stocks[:50]) # Limit per day
# Generate reports
print("\n🔄 Generating reports...")
orchestrator.step8_generate_reports()
# Export CSV
print("\n🔄 Exporting CSV files...")
orchestrator.step9_export_csv()
# Print stats
orchestrator._print_final_stats()
orchestrator.db.close()
print("\n✅ Daily update completed successfully!")
print(f"Finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
async def run_watchlist_update(watchlist_file="watchlist.txt"):
"""Update only stocks in watchlist"""
print("=" * 70)
print("WATCHLIST UPDATE")
print("=" * 70)
if not os.path.exists(watchlist_file):
print(f"❌ Watchlist file not found: {watchlist_file}")
print(" Create a watchlist.txt file with one ticker per line")
return
# Load watchlist
with open(watchlist_file, 'r') as f:
tickers = [line.strip().upper() for line in f if line.strip()]
print(f"\n📋 Loaded {len(tickers)} stocks from watchlist")
orchestrator = RobustStockIntelligence()
# Update each stock
for ticker in tickers:
print(f"\n🔄 Updating {ticker}...")
try:
await orchestrator.run_for_single_stock(ticker)
except Exception as e:
print(f"❌ Error updating {ticker}: {e}")
orchestrator.db.close()
print("\n✅ Watchlist update completed!")
def setup_cron_job():
"""Setup cron job for daily automation"""
print("=" * 70)
print("CRON JOB SETUP")
print("=" * 70)
script_path = os.path.abspath(__file__)
python_path = sys.executable
cron_command = f"0 2 * * * cd {os.path.dirname(script_path)} && {python_path} {script_path} --daily >> /tmp/stock_intelligence.log 2>&1"
print("\nTo setup daily automation, add this to your crontab:")
print("(runs every day at 2 AM)\n")
print(cron_command)
print("\nTo edit crontab, run: crontab -e")
print("Then paste the line above and save.")
print("\nOr run manually:")
print(f" python {script_path} --daily")
async def main():
"""Main entry point"""
if len(sys.argv) > 1:
command = sys.argv[1]
if command == "--daily":
# Full daily update
await run_daily_update()
elif command == "--watchlist":
# Update watchlist only
watchlist = sys.argv[2] if len(sys.argv) > 2 else "watchlist.txt"
await run_watchlist_update(watchlist)
elif command == "--setup-cron":
# Show cron setup instructions
setup_cron_job()
else:
print("Daily Automation Script")
print("\nUsage:")
print(" python daily_automation.py --daily # Update all stocks")
print(" python daily_automation.py --watchlist # Update watchlist only")
print(" python daily_automation.py --setup-cron # Show cron setup instructions")
else:
print("❌ No command specified")
print("\nUsage:")
print(" python daily_automation.py --daily # Update all stocks")
print(" python daily_automation.py --watchlist # Update watchlist only")
print(" python daily_automation.py --setup-cron # Show cron setup instructions")
if __name__ == "__main__":
asyncio.run(main())