Files
microcap_scrapping/scrape_nasdaq_tsx_only.py
T
Aherobo Ovie Victor 80ee708348 feat: Implement stock listing extraction and database population
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright.
- Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation.
- Developed `populate_database.py` to populate the database with existing JSON data.
- Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks.
- Added `setup.py` for initial setup and testing of the system.
- Created `watchlist.txt` template for user-defined stock tracking.
- Generated `final_test_output.txt` to log the results of the test run.
2025-11-06 12:34:01 +01:00

103 lines
2.8 KiB
Python

"""
Scrape and generate reports for NASDAQ and TSX stocks only.
Excludes CSE stocks which have data quality issues on Yahoo Finance.
"""
import asyncio
import sqlite3
from datetime import datetime
from scrape_yahoo_finance import YahooFinanceScraper
from database import StockDatabase
from generate_company_report import gather_contents, save_markdown, render_pdf_from_text
async def process_stock(symbol, exchange, db):
"""Process a single stock"""
print(f"\n{'='*70}")
print(f"{symbol:15s} | {exchange:10s}")
print('='*70)
scraper = YahooFinanceScraper()
try:
# Scrape
result = await scraper.scrape_stock_data(symbol, exchange)
if result.get('error'):
print(f" ❌ Error: {result['error']}")
return False
# Show quote
quote = result.get('quote', {})
print(f" Quote: Close={quote.get('close', 'N/A'):>10s} "
f"Open={quote.get('open', 'N/A'):>10s} "
f"Vol={quote.get('volume', 'N/A')}")
# Save to DB
if quote and any(quote.values()):
db.insert_stock_quote(symbol, quote)
# Generate report
content = gather_contents(symbol)
md_path = save_markdown(symbol, content)
print(f" ✅ Report: {md_path}")
try:
pdf_path = f'data/reports/{symbol}_full_report.pdf'
render_pdf_from_text(symbol, content, pdf_path)
print(f" ✅ PDF: {pdf_path}")
except:
pass
return True
except Exception as e:
print(f" ❌ Exception: {e}")
return False
async def main():
"""Main execution"""
print("\n" + "="*70)
print("NASDAQ & TSX STOCK INTELLIGENCE SYSTEM")
print("="*70)
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
# Get NASDAQ and TSX stocks from database
conn = sqlite3.connect('data/stocks.db')
cur = conn.cursor()
cur.execute('''
SELECT symbol, company_name, exchange
FROM stocks_master
WHERE exchange IN ('NASDAQ', 'TSX', 'TSXV', 'TSX/TSXV')
ORDER BY exchange, symbol
''')
stocks = cur.fetchall()
conn.close()
print(f"📊 Found {len(stocks)} NASDAQ/TSX stocks to process\n")
db = StockDatabase()
success = 0
failed = 0
for symbol, company_name, exchange in stocks:
if await process_stock(symbol, exchange, db):
success += 1
else:
failed += 1
db.close()
print("\n" + "="*70)
print("COMPLETE")
print("="*70)
print(f"✅ Success: {success}")
print(f"❌ Failed: {failed}")
print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
if __name__ == "__main__":
asyncio.run(main())