Files
Anton_wireframe/test_company_parser.py
T

79 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""
Test script for the company parser with manual JSON parsing.
"""
import asyncio
import os
import sys
sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app")
import pandas as pd
from dotenv import load_dotenv
from services.llm_parser import InvestorProcessor
# Load environment variables from root directory
load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env")
# Also check if API key is set (not needed for companies now but for consistency)
if not os.getenv("OPENROUTER_API_KEY"):
print("⚠️ WARNING: OPENROUTER_API_KEY not found in environment")
print("This is OK for companies (no LLM needed), but will fail for investors")
async def test_parser():
"""Test the new company parser with a small sample"""
print("🧪 Testing Manual Company JSON Parser (No LLM)\n")
# Load the company data
df = pd.read_csv(
"/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Companies data.csv"
)
# Process just the first 3 rows for testing
test_df = df.head(3)
processor = InvestorProcessor()
print(f"Processing {len(test_df)} test companies...\n")
results = await processor.parse_companies(test_df, save_to_db=False)
print("\n" + "=" * 80)
print("📊 TEST RESULTS")
print("=" * 80)
for idx, result in enumerate(results, 1):
print(f"\n{idx}. {result.get('name')}")
print(f" Website: {result.get('website')}")
print(f" Location: {result.get('location')}")
print(f" Industry: {result.get('industry')}")
print(
f" Founded: {result.get('founded_year')}"
if result.get("founded_year")
else " Founded: Unknown"
)
print(f" Executives: {len(result.get('key_executives', []))}")
if result.get("key_executives"):
for exec_member in result.get("key_executives", [])[:3]: # Show first 3
print(f" - {exec_member.get('name')} ({exec_member.get('title')})")
print(f" Investors: {len(result.get('investor_names', []))}")
if result.get("investor_names"):
print(
f" - {', '.join(result.get('investor_names', [])[:5])}"
) # Show first 5
print(f" Client Categories: {len(result.get('client_categories', []))}")
if result.get("client_categories"):
print(
f" - {', '.join(result.get('client_categories', [])[:3])}"
) # Show first 3
print("\n" + "=" * 80)
print(f"✅ Successfully processed {len(results)}/{len(test_df)} companies")
print("🎉 No LLM calls needed - 100% manual parsing!")
print("=" * 80)
if __name__ == "__main__":
asyncio.run(test_parser())