Files
microcap_scrapping/setup.py
T
Aherobo Ovie Victor 80ee708348 feat: Implement stock listing extraction and database population
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright.
- Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation.
- Developed `populate_database.py` to populate the database with existing JSON data.
- Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks.
- Added `setup.py` for initial setup and testing of the system.
- Created `watchlist.txt` template for user-defined stock tracking.
- Generated `final_test_output.txt` to log the results of the test run.
2025-11-06 12:34:01 +01:00

147 lines
4.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Quick Setup & Test Script
Run this first to verify everything is working!
"""
import subprocess
import sys
import os
def print_header(text):
print("\n" + "=" * 70)
print(text)
print("=" * 70)
def check_python_version():
print_header("CHECKING PYTHON VERSION")
version = sys.version_info
print(f"Python {version.major}.{version.minor}.{version.micro}")
if version.major < 3 or (version.major == 3 and version.minor < 8):
print("❌ Python 3.8+ required")
return False
print("✅ Python version OK")
return True
def install_dependencies():
print_header("INSTALLING DEPENDENCIES")
print("This may take a few minutes...")
try:
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"],
check=True, capture_output=True)
print("✅ Python packages installed")
except subprocess.CalledProcessError as e:
print(f"❌ Failed to install packages: {e}")
return False
return True
def install_playwright():
print_header("INSTALLING PLAYWRIGHT BROWSER")
print("Installing Chromium...")
try:
subprocess.run([sys.executable, "-m", "playwright", "install", "chromium"],
check=True)
print("✅ Playwright Chromium installed")
except subprocess.CalledProcessError as e:
print(f"❌ Failed to install Playwright: {e}")
return False
return True
def create_directories():
print_header("CREATING DIRECTORIES")
dirs = [
"data",
"data/listings",
"data/financials",
"data/news",
"data/reports"
]
for dir_path in dirs:
os.makedirs(dir_path, exist_ok=True)
print(f"✅ Created {dir_path}/")
return True
def run_test():
print_header("RUNNING TEST EXTRACTION")
print("This will extract a few stocks from CSE...")
print("A browser window will open - this is normal!")
print()
try:
subprocess.run([sys.executable, "test_extraction.py"], check=True)
return True
except subprocess.CalledProcessError:
print("❌ Test failed")
return False
def main():
print_header("🚀 STOCK INTELLIGENCE SYSTEM - SETUP & TEST")
print("\nThis script will:")
print(" 1. Check Python version")
print(" 2. Install required packages")
print(" 3. Install Playwright browser")
print(" 4. Create data directories")
print(" 5. Run a test extraction")
print("\nPress Enter to continue, or Ctrl+C to cancel...")
try:
input()
except KeyboardInterrupt:
print("\n\nSetup cancelled.")
return
# Run all setup steps
if not check_python_version():
return
if not install_dependencies():
print("\n❌ Setup failed at dependency installation")
return
if not install_playwright():
print("\n❌ Setup failed at Playwright installation")
return
if not create_directories():
print("\n❌ Setup failed at directory creation")
return
# Ask before running test
print_header("READY TO TEST")
print("Setup complete! Ready to run test extraction.")
print("This will take 1-2 minutes and open a browser window.")
print("\nRun test now? (y/n): ", end="")
try:
answer = input().strip().lower()
if answer == 'y':
if run_test():
print_header("✅ SUCCESS!")
print("\nEverything is working!")
print("\nNext steps:")
print(" 1. Check data/listings/ for extracted stock data")
print(" 2. Run: python main.py (for full pipeline)")
print(" 3. Check GUIDE.md for detailed usage instructions")
else:
print_header("⚠️ TEST FAILED")
print("\nThe test extraction failed. This might be because:")
print(" 1. The exchange websites changed their structure")
print(" 2. Network connectivity issues")
print(" 3. Website blocking automated access")
print("\nCheck data/listings/ for HTML files to debug.")
except KeyboardInterrupt:
print("\n\nTest skipped.")
print_header("SETUP COMPLETE")
print("You're ready to go! See GUIDE.md for usage instructions.")
if __name__ == "__main__":
main()