feat: Implement stock listing extraction and database population
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright. - Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation. - Developed `populate_database.py` to populate the database with existing JSON data. - Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks. - Added `setup.py` for initial setup and testing of the system. - Created `watchlist.txt` template for user-defined stock tracking. - Generated `final_test_output.txt` to log the results of the test run.
This commit is contained in:
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick Setup & Test Script
|
||||
Run this first to verify everything is working!
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
def print_header(text):
|
||||
print("\n" + "=" * 70)
|
||||
print(text)
|
||||
print("=" * 70)
|
||||
|
||||
def check_python_version():
|
||||
print_header("CHECKING PYTHON VERSION")
|
||||
version = sys.version_info
|
||||
print(f"Python {version.major}.{version.minor}.{version.micro}")
|
||||
if version.major < 3 or (version.major == 3 and version.minor < 8):
|
||||
print("❌ Python 3.8+ required")
|
||||
return False
|
||||
print("✅ Python version OK")
|
||||
return True
|
||||
|
||||
def install_dependencies():
|
||||
print_header("INSTALLING DEPENDENCIES")
|
||||
print("This may take a few minutes...")
|
||||
|
||||
try:
|
||||
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"],
|
||||
check=True, capture_output=True)
|
||||
print("✅ Python packages installed")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Failed to install packages: {e}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def install_playwright():
|
||||
print_header("INSTALLING PLAYWRIGHT BROWSER")
|
||||
print("Installing Chromium...")
|
||||
|
||||
try:
|
||||
subprocess.run([sys.executable, "-m", "playwright", "install", "chromium"],
|
||||
check=True)
|
||||
print("✅ Playwright Chromium installed")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Failed to install Playwright: {e}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def create_directories():
|
||||
print_header("CREATING DIRECTORIES")
|
||||
|
||||
dirs = [
|
||||
"data",
|
||||
"data/listings",
|
||||
"data/financials",
|
||||
"data/news",
|
||||
"data/reports"
|
||||
]
|
||||
|
||||
for dir_path in dirs:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
print(f"✅ Created {dir_path}/")
|
||||
|
||||
return True
|
||||
|
||||
def run_test():
|
||||
print_header("RUNNING TEST EXTRACTION")
|
||||
print("This will extract a few stocks from CSE...")
|
||||
print("A browser window will open - this is normal!")
|
||||
print()
|
||||
|
||||
try:
|
||||
subprocess.run([sys.executable, "test_extraction.py"], check=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
print("❌ Test failed")
|
||||
return False
|
||||
|
||||
def main():
|
||||
print_header("🚀 STOCK INTELLIGENCE SYSTEM - SETUP & TEST")
|
||||
print("\nThis script will:")
|
||||
print(" 1. Check Python version")
|
||||
print(" 2. Install required packages")
|
||||
print(" 3. Install Playwright browser")
|
||||
print(" 4. Create data directories")
|
||||
print(" 5. Run a test extraction")
|
||||
print("\nPress Enter to continue, or Ctrl+C to cancel...")
|
||||
|
||||
try:
|
||||
input()
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nSetup cancelled.")
|
||||
return
|
||||
|
||||
# Run all setup steps
|
||||
if not check_python_version():
|
||||
return
|
||||
|
||||
if not install_dependencies():
|
||||
print("\n❌ Setup failed at dependency installation")
|
||||
return
|
||||
|
||||
if not install_playwright():
|
||||
print("\n❌ Setup failed at Playwright installation")
|
||||
return
|
||||
|
||||
if not create_directories():
|
||||
print("\n❌ Setup failed at directory creation")
|
||||
return
|
||||
|
||||
# Ask before running test
|
||||
print_header("READY TO TEST")
|
||||
print("Setup complete! Ready to run test extraction.")
|
||||
print("This will take 1-2 minutes and open a browser window.")
|
||||
print("\nRun test now? (y/n): ", end="")
|
||||
|
||||
try:
|
||||
answer = input().strip().lower()
|
||||
if answer == 'y':
|
||||
if run_test():
|
||||
print_header("✅ SUCCESS!")
|
||||
print("\nEverything is working!")
|
||||
print("\nNext steps:")
|
||||
print(" 1. Check data/listings/ for extracted stock data")
|
||||
print(" 2. Run: python main.py (for full pipeline)")
|
||||
print(" 3. Check GUIDE.md for detailed usage instructions")
|
||||
else:
|
||||
print_header("⚠️ TEST FAILED")
|
||||
print("\nThe test extraction failed. This might be because:")
|
||||
print(" 1. The exchange websites changed their structure")
|
||||
print(" 2. Network connectivity issues")
|
||||
print(" 3. Website blocking automated access")
|
||||
print("\nCheck data/listings/ for HTML files to debug.")
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTest skipped.")
|
||||
|
||||
print_header("SETUP COMPLETE")
|
||||
print("You're ready to go! See GUIDE.md for usage instructions.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user