Files
microcap_scrapping/analyze.py
T

276 lines
9.6 KiB
Python
Raw Normal View History

"""
Example analysis script - What you can do with the collected data
"""
import sqlite3
import json
import os
from collections import defaultdict
class StockAnalyzer:
def __init__(self, db_path="data/stocks.db"):
self.conn = sqlite3.connect(db_path)
self.cursor = self.conn.cursor()
def get_summary_stats(self):
"""Get overall statistics"""
print("\n" + "=" * 70)
print("DATABASE SUMMARY STATISTICS")
print("=" * 70)
# Total stocks
self.cursor.execute("SELECT COUNT(*) FROM stocks_master")
total_stocks = self.cursor.fetchone()[0]
print(f"\nTotal stocks tracked: {total_stocks}")
# By exchange
self.cursor.execute("""
SELECT exchange, COUNT(*) as count
FROM stocks_master
GROUP BY exchange
ORDER BY count DESC
""")
print("\nStocks by exchange:")
for exchange, count in self.cursor.fetchall():
print(f" {exchange}: {count}")
# By sector (if available)
self.cursor.execute("""
SELECT sector, COUNT(*) as count
FROM stocks_master
WHERE sector IS NOT NULL AND sector != ''
GROUP BY sector
ORDER BY count DESC
LIMIT 10
""")
sectors = self.cursor.fetchall()
if sectors:
print("\nTop 10 sectors:")
for sector, count in sectors:
print(f" {sector}: {count}")
# Coverage stats
self.cursor.execute("""
SELECT
SUM(CASE WHEN has_financials = 1 THEN 1 ELSE 0 END) as with_financials,
SUM(CASE WHEN has_news = 1 THEN 1 ELSE 0 END) as with_news,
SUM(CASE WHEN has_press_releases = 1 THEN 1 ELSE 0 END) as with_pr,
SUM(CASE WHEN has_financials = 1 AND has_news = 1 AND has_press_releases = 1 THEN 1 ELSE 0 END) as complete
FROM coverage_report
""")
fin, news, pr, complete = self.cursor.fetchone()
print("\nData coverage:")
print(f" Stocks with financials: {fin}")
print(f" Stocks with news: {news}")
print(f" Stocks with press releases: {pr}")
print(f" Fully covered stocks: {complete}")
def find_recent_news_activity(self, limit=20):
"""Find stocks with most recent news"""
print("\n" + "=" * 70)
print(f"TOP {limit} STOCKS BY NEWS ACTIVITY")
print("=" * 70)
# Load news files and count articles
news_dir = "data/news"
if not os.path.exists(news_dir):
print("No news data available yet")
return
stock_news_count = []
for filename in os.listdir(news_dir):
if filename.endswith('_news_pr.json'):
ticker = filename.replace('_news_pr.json', '')
filepath = os.path.join(news_dir, filename)
with open(filepath, 'r') as f:
data = json.load(f)
news_count = len(data.get('news_articles', []))
pr_count = len(data.get('press_releases', []))
if news_count > 0 or pr_count > 0:
stock_news_count.append({
'ticker': ticker,
'news': news_count,
'pr': pr_count,
'total': news_count + pr_count
})
# Sort by total
stock_news_count.sort(key=lambda x: x['total'], reverse=True)
print(f"\n{'Ticker':<10} {'News':<10} {'PR':<10} {'Total':<10}")
print("-" * 40)
for stock in stock_news_count[:limit]:
print(f"{stock['ticker']:<10} {stock['news']:<10} {stock['pr']:<10} {stock['total']:<10}")
def find_stocks_by_sector(self, sector):
"""Find all stocks in a sector"""
print("\n" + "=" * 70)
print(f"STOCKS IN SECTOR: {sector.upper()}")
print("=" * 70)
self.cursor.execute("""
SELECT symbol, company_name, exchange
FROM stocks_master
WHERE sector LIKE ?
ORDER BY symbol
""", (f"%{sector}%",))
stocks = self.cursor.fetchall()
if stocks:
print(f"\nFound {len(stocks)} stocks:")
for symbol, name, exchange in stocks:
print(f" {symbol:<8} {name:<50} [{exchange}]")
else:
print(f"\nNo stocks found in sector: {sector}")
def get_stock_report(self, ticker):
"""Get full report for a stock"""
print("\n" + "=" * 70)
print(f"STOCK REPORT: {ticker}")
print("=" * 70)
# Get basic info
self.cursor.execute("""
SELECT company_name, exchange, sector, industry, listing_date
FROM stocks_master
WHERE symbol = ?
""", (ticker,))
result = self.cursor.fetchone()
if not result:
print(f"\nStock {ticker} not found in database")
return
name, exchange, sector, industry, listing_date = result
print(f"\nCompany: {name}")
print(f"Exchange: {exchange}")
if sector:
print(f"Sector: {sector}")
if industry:
print(f"Industry: {industry}")
if listing_date:
print(f"Listing Date: {listing_date}")
# Check coverage
self.cursor.execute("""
SELECT has_financials, has_news, has_press_releases
FROM coverage_report
WHERE ticker = ?
""", (ticker,))
coverage = self.cursor.fetchone()
if coverage:
has_fin, has_news, has_pr = coverage
print(f"\nData Coverage:")
print(f" Financials: {'' if has_fin else ''}")
print(f" News: {'' if has_news else ''}")
print(f" Press Releases: {'' if has_pr else ''}")
# Load financial data if available
fin_file = f"data/financials/{ticker}_yahoo.json"
if os.path.exists(fin_file):
print(f"\nFinancial Data: (see {fin_file})")
with open(fin_file, 'r') as f:
data = json.load(f)
if data.get('profile', {}).get('current_price'):
print(f" Current Price: ${data['profile']['current_price']}")
# Load news if available
news_file = f"data/news/{ticker}_news_pr.json"
if os.path.exists(news_file):
with open(news_file, 'r') as f:
data = json.load(f)
news_count = len(data.get('news_articles', []))
pr_count = len(data.get('press_releases', []))
print(f"\nNews & Press Releases:")
print(f" News articles: {news_count}")
print(f" Press releases: {pr_count}")
if news_count > 0:
print(f"\n Recent news:")
for article in data['news_articles'][:3]:
print(f" - {article.get('title', 'N/A')}")
# Check if report exists
report_file = f"data/reports/{ticker}_report.txt"
if os.path.exists(report_file):
print(f"\nFull report available at: {report_file}")
def export_to_csv(self, output_file="stock_list.csv"):
"""Export stock list to CSV"""
print("\n" + "=" * 70)
print(f"EXPORTING TO CSV: {output_file}")
print("=" * 70)
self.cursor.execute("""
SELECT s.symbol, s.company_name, s.exchange, s.sector, s.industry,
c.has_financials, c.has_news, c.has_press_releases
FROM stocks_master s
LEFT JOIN coverage_report c ON s.symbol = c.ticker
ORDER BY s.symbol
""")
import csv
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Symbol', 'Company', 'Exchange', 'Sector', 'Industry',
'Has Financials', 'Has News', 'Has PR'])
writer.writerows(self.cursor.fetchall())
print(f"\n✅ Exported to {output_file}")
def close(self):
self.conn.close()
def main():
"""Example usage"""
print("\n" + "=" * 70)
print("STOCK DATA ANALYSIS - EXAMPLES")
print("=" * 70)
# Check if database exists
if not os.path.exists("data/stocks.db"):
print("\n❌ Database not found!")
print(" Run 'python main.py' first to collect data")
return
analyzer = StockAnalyzer()
# Example 1: Get summary statistics
analyzer.get_summary_stats()
# Example 2: Find most active stocks (by news)
analyzer.find_recent_news_activity(limit=10)
# Example 3: Find stocks in a sector
# analyzer.find_stocks_by_sector("Technology")
# Example 4: Get report for specific stock
# analyzer.get_stock_report("ABC")
# Example 5: Export to CSV
# analyzer.export_to_csv("my_stocks.csv")
analyzer.close()
print("\n" + "=" * 70)
print("ANALYSIS COMPLETE")
print("=" * 70)
print("\nYou can modify this script to:")
print(" - Filter stocks by criteria (P/E, market cap, etc.)")
print(" - Find stocks with specific keywords in news")
print(" - Compare stocks within sectors")
print(" - Track changes over time")
print(" - Generate custom reports")
if __name__ == "__main__":
main()