cd7172ed9f
- Implemented a new test script `test_parser.py` to validate the functionality of the manual JSON parser. - The script loads investor data from a CSV file and processes a sample of three investors. - Results include detailed information about each investor, their funds, team members, and investment thesis. - Added error handling for missing API key in the environment variables.
81 lines
2.7 KiB
Python
81 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for the new manual JSON parser with LLM currency conversion.
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app")
|
|
|
|
import pandas as pd
|
|
from dotenv import load_dotenv
|
|
from services.llm_parser import InvestorProcessor
|
|
|
|
# Load environment variables from root directory
|
|
load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env")
|
|
|
|
# Also check if API key is set
|
|
if not os.getenv("OPENROUTER_API_KEY"):
|
|
print("❌ ERROR: OPENROUTER_API_KEY not found in environment")
|
|
print("Please set it in your .env file or export it:")
|
|
print("export OPENROUTER_API_KEY='your-key-here'")
|
|
sys.exit(1)
|
|
|
|
|
|
async def test_parser():
|
|
"""Test the new parser with a small sample"""
|
|
print("🧪 Testing Manual JSON Parser with LLM Currency Conversion\n")
|
|
|
|
# Load the investor data
|
|
df = pd.read_csv(
|
|
"/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Investors data.csv"
|
|
)
|
|
|
|
# Process just the first 3 rows for testing
|
|
test_df = df.head(3)
|
|
|
|
processor = InvestorProcessor()
|
|
|
|
print(f"Processing {len(test_df)} test investors...\n")
|
|
results = await processor.parse_investors(test_df, save_to_db=False)
|
|
|
|
print("\n" + "=" * 80)
|
|
print("📊 TEST RESULTS")
|
|
print("=" * 80)
|
|
|
|
for idx, result in enumerate(results, 1):
|
|
print(f"\n{idx}. {result.get('name')}")
|
|
print(f" Website: {result.get('website')}")
|
|
print(f" HQ: {result.get('headquarters')}")
|
|
print(
|
|
f" AUM: ${result.get('aum'):,}"
|
|
if result.get("aum")
|
|
else " AUM: Not Available"
|
|
)
|
|
print(f" Funds: {len(result.get('funds', []))}")
|
|
if result.get("funds"):
|
|
for fund in result.get("funds", [])[:2]: # Show first 2 funds
|
|
print(f" - {fund.get('fund_name')}")
|
|
print(f" Size: {fund.get('fund_size')}")
|
|
print(
|
|
f" Est. Investment: {fund.get('estimated_investment_size')}"
|
|
)
|
|
print(f" Team Members: {len(result.get('team_members', []))}")
|
|
if result.get("team_members"):
|
|
for member in result.get("team_members", [])[:3]: # Show first 3 members
|
|
print(f" - {member.get('name')} ({member.get('title')})")
|
|
print(f" Portfolio Highlights: {len(result.get('portfolio_highlights', []))}")
|
|
print(
|
|
f" Investment Thesis: {len(result.get('investment_thesis', []))} points"
|
|
)
|
|
|
|
print("\n" + "=" * 80)
|
|
print(f"✅ Successfully processed {len(results)}/{len(test_df)} investors")
|
|
print("=" * 80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_parser())
|