Files
Anton_wireframe/test_parser.py
T
bolade cd7172ed9f Add test script for manual JSON parser with LLM currency conversion
- Implemented a new test script `test_parser.py` to validate the functionality of the manual JSON parser.
- The script loads investor data from a CSV file and processes a sample of three investors.
- Results include detailed information about each investor, their funds, team members, and investment thesis.
- Added error handling for missing API key in the environment variables.
2025-10-06 14:07:28 +01:00

81 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""
Test script for the new manual JSON parser with LLM currency conversion.
"""
import asyncio
import os
import sys
sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app")
import pandas as pd
from dotenv import load_dotenv
from services.llm_parser import InvestorProcessor
# Load environment variables from root directory
load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env")
# Also check if API key is set
if not os.getenv("OPENROUTER_API_KEY"):
print("❌ ERROR: OPENROUTER_API_KEY not found in environment")
print("Please set it in your .env file or export it:")
print("export OPENROUTER_API_KEY='your-key-here'")
sys.exit(1)
async def test_parser():
"""Test the new parser with a small sample"""
print("🧪 Testing Manual JSON Parser with LLM Currency Conversion\n")
# Load the investor data
df = pd.read_csv(
"/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Investors data.csv"
)
# Process just the first 3 rows for testing
test_df = df.head(3)
processor = InvestorProcessor()
print(f"Processing {len(test_df)} test investors...\n")
results = await processor.parse_investors(test_df, save_to_db=False)
print("\n" + "=" * 80)
print("📊 TEST RESULTS")
print("=" * 80)
for idx, result in enumerate(results, 1):
print(f"\n{idx}. {result.get('name')}")
print(f" Website: {result.get('website')}")
print(f" HQ: {result.get('headquarters')}")
print(
f" AUM: ${result.get('aum'):,}"
if result.get("aum")
else " AUM: Not Available"
)
print(f" Funds: {len(result.get('funds', []))}")
if result.get("funds"):
for fund in result.get("funds", [])[:2]: # Show first 2 funds
print(f" - {fund.get('fund_name')}")
print(f" Size: {fund.get('fund_size')}")
print(
f" Est. Investment: {fund.get('estimated_investment_size')}"
)
print(f" Team Members: {len(result.get('team_members', []))}")
if result.get("team_members"):
for member in result.get("team_members", [])[:3]: # Show first 3 members
print(f" - {member.get('name')} ({member.get('title')})")
print(f" Portfolio Highlights: {len(result.get('portfolio_highlights', []))}")
print(
f" Investment Thesis: {len(result.get('investment_thesis', []))} points"
)
print("\n" + "=" * 80)
print(f"✅ Successfully processed {len(results)}/{len(test_df)} investors")
print("=" * 80)
if __name__ == "__main__":
asyncio.run(test_parser())