Files
microcap_scrapping/scrape_nasdaq_tsx_only.py
T

103 lines
2.8 KiB
Python
Raw Normal View History

"""
Scrape and generate reports for NASDAQ and TSX stocks only.
Excludes CSE stocks which have data quality issues on Yahoo Finance.
"""
import asyncio
import sqlite3
from datetime import datetime
from scrape_yahoo_finance import YahooFinanceScraper
from database import StockDatabase
from generate_company_report import gather_contents, save_markdown, render_pdf_from_text
async def process_stock(symbol, exchange, db):
"""Process a single stock"""
print(f"\n{'='*70}")
print(f"{symbol:15s} | {exchange:10s}")
print('='*70)
scraper = YahooFinanceScraper()
try:
# Scrape
result = await scraper.scrape_stock_data(symbol, exchange)
if result.get('error'):
print(f" ❌ Error: {result['error']}")
return False
# Show quote
quote = result.get('quote', {})
print(f" Quote: Close={quote.get('close', 'N/A'):>10s} "
f"Open={quote.get('open', 'N/A'):>10s} "
f"Vol={quote.get('volume', 'N/A')}")
# Save to DB
if quote and any(quote.values()):
db.insert_stock_quote(symbol, quote)
# Generate report
content = gather_contents(symbol)
md_path = save_markdown(symbol, content)
print(f" ✅ Report: {md_path}")
try:
pdf_path = f'data/reports/{symbol}_full_report.pdf'
render_pdf_from_text(symbol, content, pdf_path)
print(f" ✅ PDF: {pdf_path}")
except:
pass
return True
except Exception as e:
print(f" ❌ Exception: {e}")
return False
async def main():
"""Main execution"""
print("\n" + "="*70)
print("NASDAQ & TSX STOCK INTELLIGENCE SYSTEM")
print("="*70)
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
# Get NASDAQ and TSX stocks from database
conn = sqlite3.connect('data/stocks.db')
cur = conn.cursor()
cur.execute('''
SELECT symbol, company_name, exchange
FROM stocks_master
WHERE exchange IN ('NASDAQ', 'TSX', 'TSXV', 'TSX/TSXV')
ORDER BY exchange, symbol
''')
stocks = cur.fetchall()
conn.close()
print(f"📊 Found {len(stocks)} NASDAQ/TSX stocks to process\n")
db = StockDatabase()
success = 0
failed = 0
for symbol, company_name, exchange in stocks:
if await process_stock(symbol, exchange, db):
success += 1
else:
failed += 1
db.close()
print("\n" + "="*70)
print("COMPLETE")
print("="*70)
print(f"✅ Success: {success}")
print(f"❌ Failed: {failed}")
print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
if __name__ == "__main__":
asyncio.run(main())