80ee708348
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright. - Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation. - Developed `populate_database.py` to populate the database with existing JSON data. - Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks. - Added `setup.py` for initial setup and testing of the system. - Created `watchlist.txt` template for user-defined stock tracking. - Generated `final_test_output.txt` to log the results of the test run.
182 lines
5.6 KiB
Python
Executable File
182 lines
5.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Daily automation script
|
|
Runs updates for all stocks or specific tickers
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
import json
|
|
from datetime import datetime
|
|
import subprocess
|
|
|
|
# Add parent directory to path
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from main_robust import RobustStockIntelligence
|
|
|
|
|
|
async def run_daily_update():
|
|
"""Run daily updates for all stocks"""
|
|
print("=" * 70)
|
|
print("DAILY STOCK INTELLIGENCE UPDATE")
|
|
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print("=" * 70)
|
|
|
|
orchestrator = RobustStockIntelligence()
|
|
|
|
# Get all stocks from database
|
|
stocks = orchestrator.db.get_all_stocks()
|
|
|
|
if not stocks:
|
|
print("❌ No stocks in database. Run full extraction first.")
|
|
return
|
|
|
|
print(f"\n📊 Found {len(stocks)} stocks to update")
|
|
|
|
# Convert to list format
|
|
stock_list = []
|
|
for stock in stocks:
|
|
stock_list.append({
|
|
'symbol': stock[1],
|
|
'name': stock[2],
|
|
'exchange': stock[3]
|
|
})
|
|
|
|
# Update financials (daily)
|
|
print("\n🔄 Updating financials...")
|
|
await orchestrator.step3_scrape_financials(stock_list)
|
|
|
|
# Calculate metrics
|
|
print("\n🔄 Calculating metrics...")
|
|
financial_files = [f"data/financials/{s['symbol']}_yahoo.json" for s in stock_list]
|
|
financial_data = []
|
|
for f in financial_files:
|
|
if os.path.exists(f):
|
|
with open(f, 'r') as file:
|
|
financial_data.append(json.load(file))
|
|
|
|
await orchestrator.step4_calculate_metrics(financial_data)
|
|
|
|
# Update news (daily)
|
|
print("\n🔄 Updating news...")
|
|
await orchestrator.step5_scrape_news_pr(stock_list, use_serpapi=True)
|
|
|
|
# Update filings (check for new ones)
|
|
print("\n🔄 Checking for new filings...")
|
|
us_stocks = [s for s in stock_list if s.get('exchange') in ['CBOE', 'NYSE', 'NASDAQ']]
|
|
canadian_stocks = [s for s in stock_list if s.get('exchange') in ['TSX', 'TSXV', 'CSE']]
|
|
|
|
if us_stocks:
|
|
await orchestrator.step6_scrape_sec_filings(us_stocks[:50]) # Limit per day
|
|
|
|
if canadian_stocks:
|
|
await orchestrator.step7_scrape_sedar_filings(canadian_stocks[:50]) # Limit per day
|
|
|
|
# Generate reports
|
|
print("\n🔄 Generating reports...")
|
|
orchestrator.step8_generate_reports()
|
|
|
|
# Export CSV
|
|
print("\n🔄 Exporting CSV files...")
|
|
orchestrator.step9_export_csv()
|
|
|
|
# Print stats
|
|
orchestrator._print_final_stats()
|
|
|
|
orchestrator.db.close()
|
|
|
|
print("\n✅ Daily update completed successfully!")
|
|
print(f"Finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
|
|
async def run_watchlist_update(watchlist_file="watchlist.txt"):
|
|
"""Update only stocks in watchlist"""
|
|
print("=" * 70)
|
|
print("WATCHLIST UPDATE")
|
|
print("=" * 70)
|
|
|
|
if not os.path.exists(watchlist_file):
|
|
print(f"❌ Watchlist file not found: {watchlist_file}")
|
|
print(" Create a watchlist.txt file with one ticker per line")
|
|
return
|
|
|
|
# Load watchlist
|
|
with open(watchlist_file, 'r') as f:
|
|
tickers = [line.strip().upper() for line in f if line.strip()]
|
|
|
|
print(f"\n📋 Loaded {len(tickers)} stocks from watchlist")
|
|
|
|
orchestrator = RobustStockIntelligence()
|
|
|
|
# Update each stock
|
|
for ticker in tickers:
|
|
print(f"\n🔄 Updating {ticker}...")
|
|
try:
|
|
await orchestrator.run_for_single_stock(ticker)
|
|
except Exception as e:
|
|
print(f"❌ Error updating {ticker}: {e}")
|
|
|
|
orchestrator.db.close()
|
|
|
|
print("\n✅ Watchlist update completed!")
|
|
|
|
|
|
def setup_cron_job():
|
|
"""Setup cron job for daily automation"""
|
|
print("=" * 70)
|
|
print("CRON JOB SETUP")
|
|
print("=" * 70)
|
|
|
|
script_path = os.path.abspath(__file__)
|
|
python_path = sys.executable
|
|
|
|
cron_command = f"0 2 * * * cd {os.path.dirname(script_path)} && {python_path} {script_path} --daily >> /tmp/stock_intelligence.log 2>&1"
|
|
|
|
print("\nTo setup daily automation, add this to your crontab:")
|
|
print("(runs every day at 2 AM)\n")
|
|
print(cron_command)
|
|
print("\nTo edit crontab, run: crontab -e")
|
|
print("Then paste the line above and save.")
|
|
print("\nOr run manually:")
|
|
print(f" python {script_path} --daily")
|
|
|
|
|
|
async def main():
|
|
"""Main entry point"""
|
|
|
|
if len(sys.argv) > 1:
|
|
command = sys.argv[1]
|
|
|
|
if command == "--daily":
|
|
# Full daily update
|
|
await run_daily_update()
|
|
|
|
elif command == "--watchlist":
|
|
# Update watchlist only
|
|
watchlist = sys.argv[2] if len(sys.argv) > 2 else "watchlist.txt"
|
|
await run_watchlist_update(watchlist)
|
|
|
|
elif command == "--setup-cron":
|
|
# Show cron setup instructions
|
|
setup_cron_job()
|
|
|
|
else:
|
|
print("Daily Automation Script")
|
|
print("\nUsage:")
|
|
print(" python daily_automation.py --daily # Update all stocks")
|
|
print(" python daily_automation.py --watchlist # Update watchlist only")
|
|
print(" python daily_automation.py --setup-cron # Show cron setup instructions")
|
|
|
|
else:
|
|
print("❌ No command specified")
|
|
print("\nUsage:")
|
|
print(" python daily_automation.py --daily # Update all stocks")
|
|
print(" python daily_automation.py --watchlist # Update watchlist only")
|
|
print(" python daily_automation.py --setup-cron # Show cron setup instructions")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|