Add test script for manual JSON parser with LLM currency conversion
- Implemented a new test script `test_parser.py` to validate the functionality of the manual JSON parser. - The script loads investor data from a CSV file and processes a sample of three investors. - Results include detailed information about each investor, their funds, team members, and investment thesis. - Added error handling for missing API key in the environment variables.
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for the new manual JSON parser with LLM currency conversion.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app")
|
||||
|
||||
import pandas as pd
|
||||
from dotenv import load_dotenv
|
||||
from services.llm_parser import InvestorProcessor
|
||||
|
||||
# Load environment variables from root directory
|
||||
load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env")
|
||||
|
||||
# Also check if API key is set
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("❌ ERROR: OPENROUTER_API_KEY not found in environment")
|
||||
print("Please set it in your .env file or export it:")
|
||||
print("export OPENROUTER_API_KEY='your-key-here'")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
async def test_parser():
|
||||
"""Test the new parser with a small sample"""
|
||||
print("🧪 Testing Manual JSON Parser with LLM Currency Conversion\n")
|
||||
|
||||
# Load the investor data
|
||||
df = pd.read_csv(
|
||||
"/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Investors data.csv"
|
||||
)
|
||||
|
||||
# Process just the first 3 rows for testing
|
||||
test_df = df.head(3)
|
||||
|
||||
processor = InvestorProcessor()
|
||||
|
||||
print(f"Processing {len(test_df)} test investors...\n")
|
||||
results = await processor.parse_investors(test_df, save_to_db=False)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("📊 TEST RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
for idx, result in enumerate(results, 1):
|
||||
print(f"\n{idx}. {result.get('name')}")
|
||||
print(f" Website: {result.get('website')}")
|
||||
print(f" HQ: {result.get('headquarters')}")
|
||||
print(
|
||||
f" AUM: ${result.get('aum'):,}"
|
||||
if result.get("aum")
|
||||
else " AUM: Not Available"
|
||||
)
|
||||
print(f" Funds: {len(result.get('funds', []))}")
|
||||
if result.get("funds"):
|
||||
for fund in result.get("funds", [])[:2]: # Show first 2 funds
|
||||
print(f" - {fund.get('fund_name')}")
|
||||
print(f" Size: {fund.get('fund_size')}")
|
||||
print(
|
||||
f" Est. Investment: {fund.get('estimated_investment_size')}"
|
||||
)
|
||||
print(f" Team Members: {len(result.get('team_members', []))}")
|
||||
if result.get("team_members"):
|
||||
for member in result.get("team_members", [])[:3]: # Show first 3 members
|
||||
print(f" - {member.get('name')} ({member.get('title')})")
|
||||
print(f" Portfolio Highlights: {len(result.get('portfolio_highlights', []))}")
|
||||
print(
|
||||
f" Investment Thesis: {len(result.get('investment_thesis', []))} points"
|
||||
)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"✅ Successfully processed {len(results)}/{len(test_df)} investors")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_parser())
|
||||
Reference in New Issue
Block a user