feat: Implement stock listing extraction and database population
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright. - Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation. - Developed `populate_database.py` to populate the database with existing JSON data. - Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks. - Added `setup.py` for initial setup and testing of the system. - Created `watchlist.txt` template for user-defined stock tracking. - Generated `final_test_output.txt` to log the results of the test run.
This commit is contained in:
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Scrape and generate reports for NASDAQ and TSX stocks only.
|
||||
Excludes CSE stocks which have data quality issues on Yahoo Finance.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from scrape_yahoo_finance import YahooFinanceScraper
|
||||
from database import StockDatabase
|
||||
from generate_company_report import gather_contents, save_markdown, render_pdf_from_text
|
||||
|
||||
|
||||
async def process_stock(symbol, exchange, db):
|
||||
"""Process a single stock"""
|
||||
print(f"\n{'='*70}")
|
||||
print(f"{symbol:15s} | {exchange:10s}")
|
||||
print('='*70)
|
||||
|
||||
scraper = YahooFinanceScraper()
|
||||
|
||||
try:
|
||||
# Scrape
|
||||
result = await scraper.scrape_stock_data(symbol, exchange)
|
||||
|
||||
if result.get('error'):
|
||||
print(f" ❌ Error: {result['error']}")
|
||||
return False
|
||||
|
||||
# Show quote
|
||||
quote = result.get('quote', {})
|
||||
print(f" Quote: Close={quote.get('close', 'N/A'):>10s} "
|
||||
f"Open={quote.get('open', 'N/A'):>10s} "
|
||||
f"Vol={quote.get('volume', 'N/A')}")
|
||||
|
||||
# Save to DB
|
||||
if quote and any(quote.values()):
|
||||
db.insert_stock_quote(symbol, quote)
|
||||
|
||||
# Generate report
|
||||
content = gather_contents(symbol)
|
||||
md_path = save_markdown(symbol, content)
|
||||
print(f" ✅ Report: {md_path}")
|
||||
|
||||
try:
|
||||
pdf_path = f'data/reports/{symbol}_full_report.pdf'
|
||||
render_pdf_from_text(symbol, content, pdf_path)
|
||||
print(f" ✅ PDF: {pdf_path}")
|
||||
except:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Exception: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main execution"""
|
||||
print("\n" + "="*70)
|
||||
print("NASDAQ & TSX STOCK INTELLIGENCE SYSTEM")
|
||||
print("="*70)
|
||||
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
|
||||
# Get NASDAQ and TSX stocks from database
|
||||
conn = sqlite3.connect('data/stocks.db')
|
||||
cur = conn.cursor()
|
||||
cur.execute('''
|
||||
SELECT symbol, company_name, exchange
|
||||
FROM stocks_master
|
||||
WHERE exchange IN ('NASDAQ', 'TSX', 'TSXV', 'TSX/TSXV')
|
||||
ORDER BY exchange, symbol
|
||||
''')
|
||||
stocks = cur.fetchall()
|
||||
conn.close()
|
||||
|
||||
print(f"📊 Found {len(stocks)} NASDAQ/TSX stocks to process\n")
|
||||
|
||||
db = StockDatabase()
|
||||
|
||||
success = 0
|
||||
failed = 0
|
||||
|
||||
for symbol, company_name, exchange in stocks:
|
||||
if await process_stock(symbol, exchange, db):
|
||||
success += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
db.close()
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("COMPLETE")
|
||||
print("="*70)
|
||||
print(f"✅ Success: {success}")
|
||||
print(f"❌ Failed: {failed}")
|
||||
print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user