feat: Implement stock listing extraction and database population
- Added `extract_listings.py` for extracting stock listings from TSX, TSXV, CSE, and CBOE using Playwright. - Created `main.py` to orchestrate the entire stock intelligence system, including extraction, database import, financial scraping, news scraping, and report generation. - Developed `populate_database.py` to populate the database with existing JSON data. - Introduced `scrape_nasdaq_tsx_only.py` for focused scraping of NASDAQ and TSX stocks. - Added `setup.py` for initial setup and testing of the system. - Created `watchlist.txt` template for user-defined stock tracking. - Generated `final_test_output.txt` to log the results of the test run.
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
# Configuration file for API keys and settings
|
||||
# DO NOT commit this file to version control!
|
||||
|
||||
# SerpAPI Configuration
|
||||
SERPAPI_KEY = "68231e3b3a973a01483aaf098af6040d41e66f284f11abb15b8d9a005ac0f44d"
|
||||
|
||||
# Database Configuration
|
||||
DATABASE_PATH = "data/stocks.db"
|
||||
|
||||
# Scraping Configuration
|
||||
RATE_LIMIT_DELAY = 2 # seconds between requests
|
||||
MAX_RETRIES = 3
|
||||
TIMEOUT = 90 # Increased from 30 to 90 seconds
|
||||
|
||||
# Data Update Frequencies
|
||||
UPDATE_FREQUENCIES = {
|
||||
'listings': 'quarterly', # Refresh stock listings
|
||||
'financials': 'daily', # Update financial data
|
||||
'news': 'daily', # Fetch news
|
||||
'press_releases': 'daily', # Fetch press releases
|
||||
'filings': 'daily', # Check for new filings
|
||||
'ownership': 'weekly', # Update ownership data
|
||||
'agm': 'weekly', # Check AGM info
|
||||
'tax': 'quarterly' # Update tax disclosures
|
||||
}
|
||||
|
||||
# Output Configuration
|
||||
OUTPUT_FORMATS = ['json', 'csv', 'txt']
|
||||
CSV_EXPORT_PATH = "data/exports/stocks_export.csv"
|
||||
DETAILED_CSV_PATH = "data/exports/stocks_detailed.csv"
|
||||
|
||||
# SEDAR+ Configuration
|
||||
SEDAR_BASE_URL = "https://www.sedarplus.ca"
|
||||
SEDAR_SEARCH_URL = "https://www.sedarplus.ca/csa-party/records"
|
||||
|
||||
# SEC EDGAR Configuration
|
||||
SEC_BASE_URL = "https://www.sec.gov"
|
||||
SEC_API_URL = "https://data.sec.gov"
|
||||
SEC_USER_AGENT = "Stock Intelligence System contact@example.com" # Update with your email
|
||||
|
||||
# Exchange URLs
|
||||
EXCHANGES = {
|
||||
'TSX': 'https://www.tsx.com',
|
||||
'TSXV': 'https://www.tsx.com',
|
||||
'CSE': 'https://thecse.com',
|
||||
'CBOE': 'https://www.cboe.com'
|
||||
}
|
||||
|
||||
# Filing Types to Retrieve
|
||||
FILING_TYPES_SEDAR = [
|
||||
'Annual financial statements',
|
||||
'Interim financial statements',
|
||||
'Management discussion and analysis',
|
||||
'Annual information form',
|
||||
'Material change report',
|
||||
'Management information circular',
|
||||
'News release',
|
||||
'Business acquisition report'
|
||||
]
|
||||
|
||||
FILING_TYPES_SEC = [
|
||||
'10-K', # Annual report
|
||||
'10-Q', # Quarterly report
|
||||
'8-K', # Current report
|
||||
'DEF 14A', # Proxy statement
|
||||
'20-F', # Annual report (foreign)
|
||||
'6-K', # Current report (foreign)
|
||||
'SC 13D', # Beneficial ownership
|
||||
'SC 13G', # Beneficial ownership (passive)
|
||||
'3', # Initial statement of beneficial ownership
|
||||
'4', # Statement of changes in beneficial ownership
|
||||
'5' # Annual statement of beneficial ownership
|
||||
]
|
||||
Reference in New Issue
Block a user