79 lines
2.7 KiB
Python
79 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for the company parser with manual JSON parsing.
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app")
|
|
|
|
import pandas as pd
|
|
from dotenv import load_dotenv
|
|
from services.llm_parser import InvestorProcessor
|
|
|
|
# Load environment variables from root directory
|
|
load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env")
|
|
|
|
# Also check if API key is set (not needed for companies now but for consistency)
|
|
if not os.getenv("OPENROUTER_API_KEY"):
|
|
print("⚠️ WARNING: OPENROUTER_API_KEY not found in environment")
|
|
print("This is OK for companies (no LLM needed), but will fail for investors")
|
|
|
|
|
|
async def test_parser():
|
|
"""Test the new company parser with a small sample"""
|
|
print("🧪 Testing Manual Company JSON Parser (No LLM)\n")
|
|
|
|
# Load the company data
|
|
df = pd.read_csv(
|
|
"/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Companies data.csv"
|
|
)
|
|
|
|
# Process just the first 3 rows for testing
|
|
test_df = df.head(3)
|
|
|
|
processor = InvestorProcessor()
|
|
|
|
print(f"Processing {len(test_df)} test companies...\n")
|
|
results = await processor.parse_companies(test_df, save_to_db=False)
|
|
|
|
print("\n" + "=" * 80)
|
|
print("📊 TEST RESULTS")
|
|
print("=" * 80)
|
|
|
|
for idx, result in enumerate(results, 1):
|
|
print(f"\n{idx}. {result.get('name')}")
|
|
print(f" Website: {result.get('website')}")
|
|
print(f" Location: {result.get('location')}")
|
|
print(f" Industry: {result.get('industry')}")
|
|
print(
|
|
f" Founded: {result.get('founded_year')}"
|
|
if result.get("founded_year")
|
|
else " Founded: Unknown"
|
|
)
|
|
print(f" Executives: {len(result.get('key_executives', []))}")
|
|
if result.get("key_executives"):
|
|
for exec_member in result.get("key_executives", [])[:3]: # Show first 3
|
|
print(f" - {exec_member.get('name')} ({exec_member.get('title')})")
|
|
print(f" Investors: {len(result.get('investor_names', []))}")
|
|
if result.get("investor_names"):
|
|
print(
|
|
f" - {', '.join(result.get('investor_names', [])[:5])}"
|
|
) # Show first 5
|
|
print(f" Client Categories: {len(result.get('client_categories', []))}")
|
|
if result.get("client_categories"):
|
|
print(
|
|
f" - {', '.join(result.get('client_categories', [])[:3])}"
|
|
) # Show first 3
|
|
|
|
print("\n" + "=" * 80)
|
|
print(f"✅ Successfully processed {len(results)}/{len(test_df)} companies")
|
|
print("🎉 No LLM calls needed - 100% manual parsing!")
|
|
print("=" * 80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_parser())
|