feat: Implement stock listing extraction and database population
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright. - Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation. - Developed `populate_database.py` to populate the database with existing JSON data. - Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks. - Added `setup.py` for initial setup and testing of the system. - Created `watchlist.txt` template for user-defined stock tracking. - Generated `final_test_output.txt` to log the results of the test run.
This commit is contained in:
Executable
+181
@@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Daily automation script
|
||||
Runs updates for all stocks or specific tickers
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
import subprocess
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from main_robust import RobustStockIntelligence
|
||||
|
||||
|
||||
async def run_daily_update():
|
||||
"""Run daily updates for all stocks"""
|
||||
print("=" * 70)
|
||||
print("DAILY STOCK INTELLIGENCE UPDATE")
|
||||
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print("=" * 70)
|
||||
|
||||
orchestrator = RobustStockIntelligence()
|
||||
|
||||
# Get all stocks from database
|
||||
stocks = orchestrator.db.get_all_stocks()
|
||||
|
||||
if not stocks:
|
||||
print("❌ No stocks in database. Run full extraction first.")
|
||||
return
|
||||
|
||||
print(f"\n📊 Found {len(stocks)} stocks to update")
|
||||
|
||||
# Convert to list format
|
||||
stock_list = []
|
||||
for stock in stocks:
|
||||
stock_list.append({
|
||||
'symbol': stock[1],
|
||||
'name': stock[2],
|
||||
'exchange': stock[3]
|
||||
})
|
||||
|
||||
# Update financials (daily)
|
||||
print("\n🔄 Updating financials...")
|
||||
await orchestrator.step3_scrape_financials(stock_list)
|
||||
|
||||
# Calculate metrics
|
||||
print("\n🔄 Calculating metrics...")
|
||||
financial_files = [f"data/financials/{s['symbol']}_yahoo.json" for s in stock_list]
|
||||
financial_data = []
|
||||
for f in financial_files:
|
||||
if os.path.exists(f):
|
||||
with open(f, 'r') as file:
|
||||
financial_data.append(json.load(file))
|
||||
|
||||
await orchestrator.step4_calculate_metrics(financial_data)
|
||||
|
||||
# Update news (daily)
|
||||
print("\n🔄 Updating news...")
|
||||
await orchestrator.step5_scrape_news_pr(stock_list, use_serpapi=True)
|
||||
|
||||
# Update filings (check for new ones)
|
||||
print("\n🔄 Checking for new filings...")
|
||||
us_stocks = [s for s in stock_list if s.get('exchange') in ['CBOE', 'NYSE', 'NASDAQ']]
|
||||
canadian_stocks = [s for s in stock_list if s.get('exchange') in ['TSX', 'TSXV', 'CSE']]
|
||||
|
||||
if us_stocks:
|
||||
await orchestrator.step6_scrape_sec_filings(us_stocks[:50]) # Limit per day
|
||||
|
||||
if canadian_stocks:
|
||||
await orchestrator.step7_scrape_sedar_filings(canadian_stocks[:50]) # Limit per day
|
||||
|
||||
# Generate reports
|
||||
print("\n🔄 Generating reports...")
|
||||
orchestrator.step8_generate_reports()
|
||||
|
||||
# Export CSV
|
||||
print("\n🔄 Exporting CSV files...")
|
||||
orchestrator.step9_export_csv()
|
||||
|
||||
# Print stats
|
||||
orchestrator._print_final_stats()
|
||||
|
||||
orchestrator.db.close()
|
||||
|
||||
print("\n✅ Daily update completed successfully!")
|
||||
print(f"Finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
|
||||
async def run_watchlist_update(watchlist_file="watchlist.txt"):
|
||||
"""Update only stocks in watchlist"""
|
||||
print("=" * 70)
|
||||
print("WATCHLIST UPDATE")
|
||||
print("=" * 70)
|
||||
|
||||
if not os.path.exists(watchlist_file):
|
||||
print(f"❌ Watchlist file not found: {watchlist_file}")
|
||||
print(" Create a watchlist.txt file with one ticker per line")
|
||||
return
|
||||
|
||||
# Load watchlist
|
||||
with open(watchlist_file, 'r') as f:
|
||||
tickers = [line.strip().upper() for line in f if line.strip()]
|
||||
|
||||
print(f"\n📋 Loaded {len(tickers)} stocks from watchlist")
|
||||
|
||||
orchestrator = RobustStockIntelligence()
|
||||
|
||||
# Update each stock
|
||||
for ticker in tickers:
|
||||
print(f"\n🔄 Updating {ticker}...")
|
||||
try:
|
||||
await orchestrator.run_for_single_stock(ticker)
|
||||
except Exception as e:
|
||||
print(f"❌ Error updating {ticker}: {e}")
|
||||
|
||||
orchestrator.db.close()
|
||||
|
||||
print("\n✅ Watchlist update completed!")
|
||||
|
||||
|
||||
def setup_cron_job():
|
||||
"""Setup cron job for daily automation"""
|
||||
print("=" * 70)
|
||||
print("CRON JOB SETUP")
|
||||
print("=" * 70)
|
||||
|
||||
script_path = os.path.abspath(__file__)
|
||||
python_path = sys.executable
|
||||
|
||||
cron_command = f"0 2 * * * cd {os.path.dirname(script_path)} && {python_path} {script_path} --daily >> /tmp/stock_intelligence.log 2>&1"
|
||||
|
||||
print("\nTo setup daily automation, add this to your crontab:")
|
||||
print("(runs every day at 2 AM)\n")
|
||||
print(cron_command)
|
||||
print("\nTo edit crontab, run: crontab -e")
|
||||
print("Then paste the line above and save.")
|
||||
print("\nOr run manually:")
|
||||
print(f" python {script_path} --daily")
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main entry point"""
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
command = sys.argv[1]
|
||||
|
||||
if command == "--daily":
|
||||
# Full daily update
|
||||
await run_daily_update()
|
||||
|
||||
elif command == "--watchlist":
|
||||
# Update watchlist only
|
||||
watchlist = sys.argv[2] if len(sys.argv) > 2 else "watchlist.txt"
|
||||
await run_watchlist_update(watchlist)
|
||||
|
||||
elif command == "--setup-cron":
|
||||
# Show cron setup instructions
|
||||
setup_cron_job()
|
||||
|
||||
else:
|
||||
print("Daily Automation Script")
|
||||
print("\nUsage:")
|
||||
print(" python daily_automation.py --daily # Update all stocks")
|
||||
print(" python daily_automation.py --watchlist # Update watchlist only")
|
||||
print(" python daily_automation.py --setup-cron # Show cron setup instructions")
|
||||
|
||||
else:
|
||||
print("❌ No command specified")
|
||||
print("\nUsage:")
|
||||
print(" python daily_automation.py --daily # Update all stocks")
|
||||
print(" python daily_automation.py --watchlist # Update watchlist only")
|
||||
print(" python daily_automation.py --setup-cron # Show cron setup instructions")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user