80ee708348
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright. - Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation. - Developed `populate_database.py` to populate the database with existing JSON data. - Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks. - Added `setup.py` for initial setup and testing of the system. - Created `watchlist.txt` template for user-defined stock tracking. - Generated `final_test_output.txt` to log the results of the test run.
103 lines
2.8 KiB
Python
103 lines
2.8 KiB
Python
"""
|
|
Scrape and generate reports for NASDAQ and TSX stocks only.
|
|
Excludes CSE stocks which have data quality issues on Yahoo Finance.
|
|
"""
|
|
|
|
import asyncio
|
|
import sqlite3
|
|
from datetime import datetime
|
|
from scrape_yahoo_finance import YahooFinanceScraper
|
|
from database import StockDatabase
|
|
from generate_company_report import gather_contents, save_markdown, render_pdf_from_text
|
|
|
|
|
|
async def process_stock(symbol, exchange, db):
|
|
"""Process a single stock"""
|
|
print(f"\n{'='*70}")
|
|
print(f"{symbol:15s} | {exchange:10s}")
|
|
print('='*70)
|
|
|
|
scraper = YahooFinanceScraper()
|
|
|
|
try:
|
|
# Scrape
|
|
result = await scraper.scrape_stock_data(symbol, exchange)
|
|
|
|
if result.get('error'):
|
|
print(f" ❌ Error: {result['error']}")
|
|
return False
|
|
|
|
# Show quote
|
|
quote = result.get('quote', {})
|
|
print(f" Quote: Close={quote.get('close', 'N/A'):>10s} "
|
|
f"Open={quote.get('open', 'N/A'):>10s} "
|
|
f"Vol={quote.get('volume', 'N/A')}")
|
|
|
|
# Save to DB
|
|
if quote and any(quote.values()):
|
|
db.insert_stock_quote(symbol, quote)
|
|
|
|
# Generate report
|
|
content = gather_contents(symbol)
|
|
md_path = save_markdown(symbol, content)
|
|
print(f" ✅ Report: {md_path}")
|
|
|
|
try:
|
|
pdf_path = f'data/reports/{symbol}_full_report.pdf'
|
|
render_pdf_from_text(symbol, content, pdf_path)
|
|
print(f" ✅ PDF: {pdf_path}")
|
|
except:
|
|
pass
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Exception: {e}")
|
|
return False
|
|
|
|
|
|
async def main():
|
|
"""Main execution"""
|
|
print("\n" + "="*70)
|
|
print("NASDAQ & TSX STOCK INTELLIGENCE SYSTEM")
|
|
print("="*70)
|
|
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
|
|
# Get NASDAQ and TSX stocks from database
|
|
conn = sqlite3.connect('data/stocks.db')
|
|
cur = conn.cursor()
|
|
cur.execute('''
|
|
SELECT symbol, company_name, exchange
|
|
FROM stocks_master
|
|
WHERE exchange IN ('NASDAQ', 'TSX', 'TSXV', 'TSX/TSXV')
|
|
ORDER BY exchange, symbol
|
|
''')
|
|
stocks = cur.fetchall()
|
|
conn.close()
|
|
|
|
print(f"📊 Found {len(stocks)} NASDAQ/TSX stocks to process\n")
|
|
|
|
db = StockDatabase()
|
|
|
|
success = 0
|
|
failed = 0
|
|
|
|
for symbol, company_name, exchange in stocks:
|
|
if await process_stock(symbol, exchange, db):
|
|
success += 1
|
|
else:
|
|
failed += 1
|
|
|
|
db.close()
|
|
|
|
print("\n" + "="*70)
|
|
print("COMPLETE")
|
|
print("="*70)
|
|
print(f"✅ Success: {success}")
|
|
print(f"❌ Failed: {failed}")
|
|
print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|