Add test script for manual JSON parser with LLM currency conversion

- Implemented a new test script `test_parser.py` to validate the functionality of the manual JSON parser. - The script loads investor data from a CSV file and processes a sample of three investors. - Results include detailed information about each investor, their funds, team members, and investment thesis. - Added error handling for missing API key in the environment variables.
2025-10-06 14:07:28 +01:00
parent c199f5423a
commit cd7172ed9f
11 changed files with 31090 additions and 49 deletions
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""
+Test script for the new manual JSON parser with LLM currency conversion.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app")
+
+import pandas as pd
+from dotenv import load_dotenv
+from services.llm_parser import InvestorProcessor
+
+# Load environment variables from root directory
+load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env")
+
+# Also check if API key is set
+if not os.getenv("OPENROUTER_API_KEY"):
+    print("❌ ERROR: OPENROUTER_API_KEY not found in environment")
+    print("Please set it in your .env file or export it:")
+    print("export OPENROUTER_API_KEY='your-key-here'")
+    sys.exit(1)
+
+
+async def test_parser():
+    """Test the new parser with a small sample"""
+    print("🧪 Testing Manual JSON Parser with LLM Currency Conversion\n")
+
+    # Load the investor data
+    df = pd.read_csv(
+        "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Investors data.csv"
+    )
+
+    # Process just the first 3 rows for testing
+    test_df = df.head(3)
+
+    processor = InvestorProcessor()
+
+    print(f"Processing {len(test_df)} test investors...\n")
+    results = await processor.parse_investors(test_df, save_to_db=False)
+
+    print("\n" + "=" * 80)
+    print("📊 TEST RESULTS")
+    print("=" * 80)
+
+    for idx, result in enumerate(results, 1):
+        print(f"\n{idx}. {result.get('name')}")
+        print(f"   Website: {result.get('website')}")
+        print(f"   HQ: {result.get('headquarters')}")
+        print(
+            f"   AUM: ${result.get('aum'):,}"
+            if result.get("aum")
+            else "   AUM: Not Available"
+        )
+        print(f"   Funds: {len(result.get('funds', []))}")
+        if result.get("funds"):
+            for fund in result.get("funds", [])[:2]:  # Show first 2 funds
+                print(f"      - {fund.get('fund_name')}")
+                print(f"        Size: {fund.get('fund_size')}")
+                print(
+                    f"        Est. Investment: {fund.get('estimated_investment_size')}"
+                )
+        print(f"   Team Members: {len(result.get('team_members', []))}")
+        if result.get("team_members"):
+            for member in result.get("team_members", [])[:3]:  # Show first 3 members
+                print(f"      - {member.get('name')} ({member.get('title')})")
+        print(f"   Portfolio Highlights: {len(result.get('portfolio_highlights', []))}")
+        print(
+            f"   Investment Thesis: {len(result.get('investment_thesis', []))} points"
+        )
+
+    print("\n" + "=" * 80)
+    print(f"✅ Successfully processed {len(results)}/{len(test_df)} investors")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    asyncio.run(test_parser())