Files

495 lines
19 KiB
Python
Raw Permalink Normal View History

"""
Database setup for Stock Intelligence System
SQLite database with all required tables
"""
import sqlite3
import os
from datetime import datetime
import json
class StockDatabase:
def __init__(self, db_path="data/stocks.db"):
self.db_path = db_path
os.makedirs(os.path.dirname(db_path), exist_ok=True)
self.conn = sqlite3.connect(db_path)
self.cursor = self.conn.cursor()
self.create_tables()
def create_tables(self):
"""Create all database tables"""
# Main stocks master table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS stocks_master (
id INTEGER PRIMARY KEY AUTOINCREMENT,
symbol TEXT NOT NULL UNIQUE,
company_name TEXT NOT NULL,
exchange TEXT NOT NULL,
sector TEXT,
industry TEXT,
country TEXT,
listing_date TEXT,
status TEXT DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Financial statements table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS financial_statements (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
year INTEGER NOT NULL,
quarter TEXT,
statement_type TEXT NOT NULL,
revenue REAL,
cogs REAL,
gross_profit REAL,
operating_income REAL,
net_income REAL,
eps REAL,
ebit REAL,
ebitda REAL,
total_assets REAL,
total_liabilities REAL,
total_debt REAL,
shareholders_equity REAL,
cash REAL,
operating_cash_flow REAL,
investing_cash_flow REAL,
financing_cash_flow REAL,
free_cash_flow REAL,
is_ttm BOOLEAN DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id),
UNIQUE(stock_id, year, quarter, statement_type)
)
""")
# Financial metrics table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS financial_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
year INTEGER NOT NULL,
quarter TEXT,
is_ttm BOOLEAN DEFAULT 0,
-- Valuation Ratios
pe_ratio REAL,
peg_ratio REAL,
pb_ratio REAL,
ps_ratio REAL,
price_to_cash_flow REAL,
ev_ebitda REAL,
ev_ebit REAL,
dividend_yield REAL,
price_to_fcf REAL,
ev_to_sales REAL,
-- Profitability Ratios
gross_margin REAL,
operating_margin REAL,
net_margin REAL,
roe REAL,
roa REAL,
roce REAL,
roic REAL,
ebitda_margin REAL,
-- Leverage Ratios
debt_to_equity REAL,
debt_to_assets REAL,
interest_coverage REAL,
financial_leverage REAL,
-- Liquidity Ratios
current_ratio REAL,
quick_ratio REAL,
cash_ratio REAL,
working_capital_ratio REAL,
-- Efficiency Ratios
inventory_turnover REAL,
asset_turnover REAL,
receivables_turnover REAL,
payables_turnover REAL,
days_sales_outstanding REAL,
days_inventory_outstanding REAL,
days_payable_outstanding REAL,
-- Growth Metrics
revenue_growth_yoy REAL,
eps_growth_yoy REAL,
net_income_growth_yoy REAL,
book_value_growth_yoy REAL,
-- Cash Flow Metrics
fcf_yield REAL,
operating_cf_ratio REAL,
capex_ratio REAL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id),
UNIQUE(stock_id, year, quarter)
)
""")
# News articles table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS news_articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
title TEXT NOT NULL,
source TEXT,
published_date TEXT,
url TEXT,
snippet TEXT,
full_text TEXT,
sentiment TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id)
)
""")
# Press releases table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS press_releases (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
title TEXT NOT NULL,
source TEXT NOT NULL,
published_date TEXT,
url TEXT,
summary TEXT,
full_text TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id)
)
""")
# Regulatory filings table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS filings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
filing_date TEXT NOT NULL,
filing_type TEXT NOT NULL,
title TEXT,
document_url TEXT,
source TEXT,
filing_text TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id)
)
""")
# AGM information table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS agm_info (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
agm_date TEXT,
agm_time TEXT,
agm_location TEXT,
agm_agenda TEXT,
document_url TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id)
)
""")
# Tax disclosures table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS tax_disclosures (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
year INTEGER NOT NULL,
income_tax_expense REAL,
deferred_tax_assets REAL,
deferred_tax_liabilities REAL,
effective_tax_rate REAL,
tax_loss_carryforwards REAL,
tax_jurisdictions TEXT,
notes TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id),
UNIQUE(stock_id, year)
)
""")
# Stock quotes table (real-time price data)
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS stock_quotes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
stock_id INTEGER NOT NULL,
quote_date TEXT,
quote_time TEXT,
open_price REAL,
high_price REAL,
low_price REAL,
close_price REAL,
volume TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (stock_id) REFERENCES stocks_master(id)
)
""")
# Coverage tracking table
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS coverage_report (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ticker TEXT NOT NULL UNIQUE,
exchange TEXT NOT NULL,
has_financials BOOLEAN DEFAULT 0,
has_ttm BOOLEAN DEFAULT 0,
has_news BOOLEAN DEFAULT 0,
has_press_releases BOOLEAN DEFAULT 0,
has_filings BOOLEAN DEFAULT 0,
has_tax_disclosures BOOLEAN DEFAULT 0,
has_agm_info BOOLEAN DEFAULT 0,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
error_log TEXT,
FOREIGN KEY (ticker) REFERENCES stocks_master(symbol)
)
""")
self.conn.commit()
print("✅ Database tables created successfully")
def add_stock(self, symbol, company_name, exchange, sector=None, industry=None, country=None, listing_date=None):
"""Add a stock to the master table"""
try:
self.cursor.execute("""
INSERT OR IGNORE INTO stocks_master
(symbol, company_name, exchange, sector, industry, country, listing_date)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (symbol, company_name, exchange, sector, industry, country, listing_date))
self.conn.commit()
return self.cursor.lastrowid
except Exception as e:
print(f"Error adding stock {symbol}: {e}")
return None
def import_listings_from_json(self, json_file):
"""Import stock listings from JSON file"""
print(f"\n📥 Importing listings from {json_file}...")
with open(json_file, 'r', encoding='utf-8') as f:
listings = json.load(f)
imported = 0
for stock in listings:
stock_id = self.add_stock(
symbol=stock.get('symbol'),
company_name=stock.get('name'),
exchange=stock.get('exchange'),
sector=stock.get('sector'),
industry=stock.get('industry'),
country=stock.get('country', 'Canada')
)
if stock_id:
imported += 1
# Also add to coverage report
self.cursor.execute("""
INSERT OR IGNORE INTO coverage_report (ticker, exchange)
VALUES (?, ?)
""", (stock.get('symbol'), stock.get('exchange')))
self.conn.commit()
print(f"✅ Imported {imported} stocks")
return imported
def get_all_stocks(self):
"""Get all stocks from database"""
self.cursor.execute("SELECT * FROM stocks_master")
return self.cursor.fetchall()
def get_coverage_report(self):
"""Get coverage report for all stocks"""
self.cursor.execute("""
SELECT ticker, exchange,
has_financials, has_ttm, has_news, has_press_releases,
has_filings, has_tax_disclosures, has_agm_info,
last_updated
FROM coverage_report
ORDER BY ticker
""")
return self.cursor.fetchall()
def update_coverage(self, ticker, **kwargs):
"""Update coverage flags for a stock"""
fields = []
values = []
for key, value in kwargs.items():
fields.append(f"{key} = ?")
values.append(value)
if fields:
query = f"UPDATE coverage_report SET {', '.join(fields)}, last_updated = ? WHERE ticker = ?"
values.extend([datetime.now().isoformat(), ticker])
self.cursor.execute(query, values)
self.conn.commit()
def get_stock_id(self, ticker):
"""Get stock ID from ticker"""
self.cursor.execute("SELECT id FROM stocks_master WHERE symbol = ?", (ticker,))
result = self.cursor.fetchone()
return result[0] if result else None
def insert_financial_metrics(self, ticker, year, metrics_dict, is_ttm=False, quarter=None):
"""Insert calculated financial metrics into database"""
stock_id = self.get_stock_id(ticker)
if not stock_id:
return False
try:
self.cursor.execute("""
INSERT OR REPLACE INTO financial_metrics (
stock_id, year, quarter, is_ttm,
pe_ratio, peg_ratio, pb_ratio, ps_ratio, price_to_cash_flow,
ev_ebitda, ev_ebit, dividend_yield, price_to_fcf, ev_to_sales,
gross_margin, operating_margin, net_margin, roe, roa, roce, roic, ebitda_margin,
debt_to_equity, debt_to_assets, interest_coverage, financial_leverage,
current_ratio, quick_ratio, cash_ratio, working_capital_ratio,
inventory_turnover, asset_turnover, receivables_turnover, payables_turnover,
days_sales_outstanding, days_inventory_outstanding, days_payable_outstanding,
revenue_growth_yoy, eps_growth_yoy, net_income_growth_yoy, book_value_growth_yoy,
fcf_yield, operating_cf_ratio, capex_ratio
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
stock_id, year, quarter, is_ttm,
metrics_dict.get('pe_ratio'), metrics_dict.get('peg_ratio'), metrics_dict.get('pb_ratio'),
metrics_dict.get('ps_ratio'), metrics_dict.get('price_to_cash_flow'),
metrics_dict.get('ev_ebitda'), metrics_dict.get('ev_ebit'), metrics_dict.get('dividend_yield'),
metrics_dict.get('price_to_fcf'), metrics_dict.get('ev_to_sales'),
metrics_dict.get('gross_margin'), metrics_dict.get('operating_margin'), metrics_dict.get('net_margin'),
metrics_dict.get('roe'), metrics_dict.get('roa'), metrics_dict.get('roce'),
metrics_dict.get('roic'), metrics_dict.get('ebitda_margin'),
metrics_dict.get('debt_to_equity'), metrics_dict.get('debt_to_assets'),
metrics_dict.get('interest_coverage'), metrics_dict.get('financial_leverage'),
metrics_dict.get('current_ratio'), metrics_dict.get('quick_ratio'), metrics_dict.get('cash_ratio'),
metrics_dict.get('working_capital_ratio'),
metrics_dict.get('inventory_turnover'), metrics_dict.get('asset_turnover'),
metrics_dict.get('receivables_turnover'), metrics_dict.get('payables_turnover'),
metrics_dict.get('days_sales_outstanding'), metrics_dict.get('days_inventory_outstanding'),
metrics_dict.get('days_payable_outstanding'),
metrics_dict.get('revenue_growth_yoy'), metrics_dict.get('eps_growth_yoy'),
metrics_dict.get('net_income_growth_yoy'), metrics_dict.get('book_value_growth_yoy'),
metrics_dict.get('fcf_yield'), metrics_dict.get('operating_cf_ratio'), metrics_dict.get('capex_ratio')
))
self.conn.commit()
return True
except Exception as e:
print(f"Error inserting metrics for {ticker}: {e}")
return False
def insert_news_article(self, ticker, title, source, published_date, url, snippet=None):
"""Insert news article into database"""
stock_id = self.get_stock_id(ticker)
if not stock_id:
return False
try:
self.cursor.execute("""
INSERT OR IGNORE INTO news_articles (stock_id, title, source, published_date, url, snippet)
VALUES (?, ?, ?, ?, ?, ?)
""", (stock_id, title, source, published_date, url, snippet))
self.conn.commit()
return True
except Exception as e:
print(f"Error inserting news for {ticker}: {e}")
return False
def insert_filing(self, ticker, filing_date, filing_type, title, document_url, source):
"""Insert regulatory filing into database"""
stock_id = self.get_stock_id(ticker)
if not stock_id:
return False
try:
self.cursor.execute("""
INSERT OR IGNORE INTO filings (stock_id, filing_date, filing_type, title, document_url, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (stock_id, filing_date, filing_type, title, document_url, source))
self.conn.commit()
return True
except Exception as e:
print(f"Error inserting filing for {ticker}: {e}")
return False
def insert_stock_quote(self, ticker, quote_data):
"""Insert stock quote data into database"""
stock_id = self.get_stock_id(ticker)
if not stock_id:
return False
try:
# Parse price values (remove commas and convert to float)
def parse_price(value):
if not value:
return None
try:
return float(str(value).replace(',', ''))
except:
return None
open_price = parse_price(quote_data.get('open'))
high_price = parse_price(quote_data.get('high'))
low_price = parse_price(quote_data.get('low'))
close_price = parse_price(quote_data.get('close'))
volume = quote_data.get('volume', '')
quote_date = quote_data.get('date', '')
self.cursor.execute("""
INSERT INTO stock_quotes
(stock_id, quote_date, open_price, high_price, low_price, close_price, volume)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (stock_id, quote_date, open_price, high_price, low_price, close_price, volume))
self.conn.commit()
return True
except Exception as e:
print(f"Error inserting quote for {ticker}: {e}")
return False
def close(self):
"""Close database connection"""
self.conn.close()
def main():
"""Initialize database and import listings if available"""
db = StockDatabase()
# Check if we have listings to import
listings_file = "data/listings/all_listings_combined.json"
if os.path.exists(listings_file):
db.import_listings_from_json(listings_file)
# Show stats
stocks = db.get_all_stocks()
print(f"\n📊 Database Statistics:")
print(f" Total stocks: {len(stocks)}")
# Group by exchange
exchanges = {}
for stock in stocks:
exchange = stock[3] # exchange column
exchanges[exchange] = exchanges.get(exchange, 0) + 1
for exchange, count in exchanges.items():
print(f" {exchange}: {count} stocks")
else:
print(f"⚠️ No listings file found at {listings_file}")
print(" Run extract_listings.py first to get stock data")
db.close()
if __name__ == "__main__":
main()