276 lines
9.6 KiB
Python
276 lines
9.6 KiB
Python
|
|
"""
|
||
|
|
Example analysis script - What you can do with the collected data
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sqlite3
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
from collections import defaultdict
|
||
|
|
|
||
|
|
class StockAnalyzer:
|
||
|
|
def __init__(self, db_path="data/stocks.db"):
|
||
|
|
self.conn = sqlite3.connect(db_path)
|
||
|
|
self.cursor = self.conn.cursor()
|
||
|
|
|
||
|
|
def get_summary_stats(self):
|
||
|
|
"""Get overall statistics"""
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("DATABASE SUMMARY STATISTICS")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Total stocks
|
||
|
|
self.cursor.execute("SELECT COUNT(*) FROM stocks_master")
|
||
|
|
total_stocks = self.cursor.fetchone()[0]
|
||
|
|
print(f"\nTotal stocks tracked: {total_stocks}")
|
||
|
|
|
||
|
|
# By exchange
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT exchange, COUNT(*) as count
|
||
|
|
FROM stocks_master
|
||
|
|
GROUP BY exchange
|
||
|
|
ORDER BY count DESC
|
||
|
|
""")
|
||
|
|
print("\nStocks by exchange:")
|
||
|
|
for exchange, count in self.cursor.fetchall():
|
||
|
|
print(f" {exchange}: {count}")
|
||
|
|
|
||
|
|
# By sector (if available)
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT sector, COUNT(*) as count
|
||
|
|
FROM stocks_master
|
||
|
|
WHERE sector IS NOT NULL AND sector != ''
|
||
|
|
GROUP BY sector
|
||
|
|
ORDER BY count DESC
|
||
|
|
LIMIT 10
|
||
|
|
""")
|
||
|
|
sectors = self.cursor.fetchall()
|
||
|
|
if sectors:
|
||
|
|
print("\nTop 10 sectors:")
|
||
|
|
for sector, count in sectors:
|
||
|
|
print(f" {sector}: {count}")
|
||
|
|
|
||
|
|
# Coverage stats
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT
|
||
|
|
SUM(CASE WHEN has_financials = 1 THEN 1 ELSE 0 END) as with_financials,
|
||
|
|
SUM(CASE WHEN has_news = 1 THEN 1 ELSE 0 END) as with_news,
|
||
|
|
SUM(CASE WHEN has_press_releases = 1 THEN 1 ELSE 0 END) as with_pr,
|
||
|
|
SUM(CASE WHEN has_financials = 1 AND has_news = 1 AND has_press_releases = 1 THEN 1 ELSE 0 END) as complete
|
||
|
|
FROM coverage_report
|
||
|
|
""")
|
||
|
|
fin, news, pr, complete = self.cursor.fetchone()
|
||
|
|
|
||
|
|
print("\nData coverage:")
|
||
|
|
print(f" Stocks with financials: {fin}")
|
||
|
|
print(f" Stocks with news: {news}")
|
||
|
|
print(f" Stocks with press releases: {pr}")
|
||
|
|
print(f" Fully covered stocks: {complete}")
|
||
|
|
|
||
|
|
def find_recent_news_activity(self, limit=20):
|
||
|
|
"""Find stocks with most recent news"""
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print(f"TOP {limit} STOCKS BY NEWS ACTIVITY")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Load news files and count articles
|
||
|
|
news_dir = "data/news"
|
||
|
|
if not os.path.exists(news_dir):
|
||
|
|
print("No news data available yet")
|
||
|
|
return
|
||
|
|
|
||
|
|
stock_news_count = []
|
||
|
|
|
||
|
|
for filename in os.listdir(news_dir):
|
||
|
|
if filename.endswith('_news_pr.json'):
|
||
|
|
ticker = filename.replace('_news_pr.json', '')
|
||
|
|
filepath = os.path.join(news_dir, filename)
|
||
|
|
|
||
|
|
with open(filepath, 'r') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
news_count = len(data.get('news_articles', []))
|
||
|
|
pr_count = len(data.get('press_releases', []))
|
||
|
|
|
||
|
|
if news_count > 0 or pr_count > 0:
|
||
|
|
stock_news_count.append({
|
||
|
|
'ticker': ticker,
|
||
|
|
'news': news_count,
|
||
|
|
'pr': pr_count,
|
||
|
|
'total': news_count + pr_count
|
||
|
|
})
|
||
|
|
|
||
|
|
# Sort by total
|
||
|
|
stock_news_count.sort(key=lambda x: x['total'], reverse=True)
|
||
|
|
|
||
|
|
print(f"\n{'Ticker':<10} {'News':<10} {'PR':<10} {'Total':<10}")
|
||
|
|
print("-" * 40)
|
||
|
|
for stock in stock_news_count[:limit]:
|
||
|
|
print(f"{stock['ticker']:<10} {stock['news']:<10} {stock['pr']:<10} {stock['total']:<10}")
|
||
|
|
|
||
|
|
def find_stocks_by_sector(self, sector):
|
||
|
|
"""Find all stocks in a sector"""
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print(f"STOCKS IN SECTOR: {sector.upper()}")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT symbol, company_name, exchange
|
||
|
|
FROM stocks_master
|
||
|
|
WHERE sector LIKE ?
|
||
|
|
ORDER BY symbol
|
||
|
|
""", (f"%{sector}%",))
|
||
|
|
|
||
|
|
stocks = self.cursor.fetchall()
|
||
|
|
|
||
|
|
if stocks:
|
||
|
|
print(f"\nFound {len(stocks)} stocks:")
|
||
|
|
for symbol, name, exchange in stocks:
|
||
|
|
print(f" {symbol:<8} {name:<50} [{exchange}]")
|
||
|
|
else:
|
||
|
|
print(f"\nNo stocks found in sector: {sector}")
|
||
|
|
|
||
|
|
def get_stock_report(self, ticker):
|
||
|
|
"""Get full report for a stock"""
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print(f"STOCK REPORT: {ticker}")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Get basic info
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT company_name, exchange, sector, industry, listing_date
|
||
|
|
FROM stocks_master
|
||
|
|
WHERE symbol = ?
|
||
|
|
""", (ticker,))
|
||
|
|
|
||
|
|
result = self.cursor.fetchone()
|
||
|
|
if not result:
|
||
|
|
print(f"\nStock {ticker} not found in database")
|
||
|
|
return
|
||
|
|
|
||
|
|
name, exchange, sector, industry, listing_date = result
|
||
|
|
|
||
|
|
print(f"\nCompany: {name}")
|
||
|
|
print(f"Exchange: {exchange}")
|
||
|
|
if sector:
|
||
|
|
print(f"Sector: {sector}")
|
||
|
|
if industry:
|
||
|
|
print(f"Industry: {industry}")
|
||
|
|
if listing_date:
|
||
|
|
print(f"Listing Date: {listing_date}")
|
||
|
|
|
||
|
|
# Check coverage
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT has_financials, has_news, has_press_releases
|
||
|
|
FROM coverage_report
|
||
|
|
WHERE ticker = ?
|
||
|
|
""", (ticker,))
|
||
|
|
|
||
|
|
coverage = self.cursor.fetchone()
|
||
|
|
if coverage:
|
||
|
|
has_fin, has_news, has_pr = coverage
|
||
|
|
print(f"\nData Coverage:")
|
||
|
|
print(f" Financials: {'✅' if has_fin else '❌'}")
|
||
|
|
print(f" News: {'✅' if has_news else '❌'}")
|
||
|
|
print(f" Press Releases: {'✅' if has_pr else '❌'}")
|
||
|
|
|
||
|
|
# Load financial data if available
|
||
|
|
fin_file = f"data/financials/{ticker}_yahoo.json"
|
||
|
|
if os.path.exists(fin_file):
|
||
|
|
print(f"\nFinancial Data: (see {fin_file})")
|
||
|
|
with open(fin_file, 'r') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
if data.get('profile', {}).get('current_price'):
|
||
|
|
print(f" Current Price: ${data['profile']['current_price']}")
|
||
|
|
|
||
|
|
# Load news if available
|
||
|
|
news_file = f"data/news/{ticker}_news_pr.json"
|
||
|
|
if os.path.exists(news_file):
|
||
|
|
with open(news_file, 'r') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
news_count = len(data.get('news_articles', []))
|
||
|
|
pr_count = len(data.get('press_releases', []))
|
||
|
|
print(f"\nNews & Press Releases:")
|
||
|
|
print(f" News articles: {news_count}")
|
||
|
|
print(f" Press releases: {pr_count}")
|
||
|
|
|
||
|
|
if news_count > 0:
|
||
|
|
print(f"\n Recent news:")
|
||
|
|
for article in data['news_articles'][:3]:
|
||
|
|
print(f" - {article.get('title', 'N/A')}")
|
||
|
|
|
||
|
|
# Check if report exists
|
||
|
|
report_file = f"data/reports/{ticker}_report.txt"
|
||
|
|
if os.path.exists(report_file):
|
||
|
|
print(f"\nFull report available at: {report_file}")
|
||
|
|
|
||
|
|
def export_to_csv(self, output_file="stock_list.csv"):
|
||
|
|
"""Export stock list to CSV"""
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print(f"EXPORTING TO CSV: {output_file}")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
self.cursor.execute("""
|
||
|
|
SELECT s.symbol, s.company_name, s.exchange, s.sector, s.industry,
|
||
|
|
c.has_financials, c.has_news, c.has_press_releases
|
||
|
|
FROM stocks_master s
|
||
|
|
LEFT JOIN coverage_report c ON s.symbol = c.ticker
|
||
|
|
ORDER BY s.symbol
|
||
|
|
""")
|
||
|
|
|
||
|
|
import csv
|
||
|
|
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||
|
|
writer = csv.writer(f)
|
||
|
|
writer.writerow(['Symbol', 'Company', 'Exchange', 'Sector', 'Industry',
|
||
|
|
'Has Financials', 'Has News', 'Has PR'])
|
||
|
|
writer.writerows(self.cursor.fetchall())
|
||
|
|
|
||
|
|
print(f"\n✅ Exported to {output_file}")
|
||
|
|
|
||
|
|
def close(self):
|
||
|
|
self.conn.close()
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Example usage"""
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("STOCK DATA ANALYSIS - EXAMPLES")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Check if database exists
|
||
|
|
if not os.path.exists("data/stocks.db"):
|
||
|
|
print("\n❌ Database not found!")
|
||
|
|
print(" Run 'python main.py' first to collect data")
|
||
|
|
return
|
||
|
|
|
||
|
|
analyzer = StockAnalyzer()
|
||
|
|
|
||
|
|
# Example 1: Get summary statistics
|
||
|
|
analyzer.get_summary_stats()
|
||
|
|
|
||
|
|
# Example 2: Find most active stocks (by news)
|
||
|
|
analyzer.find_recent_news_activity(limit=10)
|
||
|
|
|
||
|
|
# Example 3: Find stocks in a sector
|
||
|
|
# analyzer.find_stocks_by_sector("Technology")
|
||
|
|
|
||
|
|
# Example 4: Get report for specific stock
|
||
|
|
# analyzer.get_stock_report("ABC")
|
||
|
|
|
||
|
|
# Example 5: Export to CSV
|
||
|
|
# analyzer.export_to_csv("my_stocks.csv")
|
||
|
|
|
||
|
|
analyzer.close()
|
||
|
|
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("ANALYSIS COMPLETE")
|
||
|
|
print("=" * 70)
|
||
|
|
print("\nYou can modify this script to:")
|
||
|
|
print(" - Filter stocks by criteria (P/E, market cap, etc.)")
|
||
|
|
print(" - Find stocks with specific keywords in news")
|
||
|
|
print(" - Compare stocks within sectors")
|
||
|
|
print(" - Track changes over time")
|
||
|
|
print(" - Generate custom reports")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|