#!/usr/bin/env python3 """ Test script for the company parser with manual JSON parsing. """ import asyncio import os import sys sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app") import pandas as pd from dotenv import load_dotenv from services.llm_parser import InvestorProcessor # Load environment variables from root directory load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env") # Also check if API key is set (not needed for companies now but for consistency) if not os.getenv("OPENROUTER_API_KEY"): print("โš ๏ธ WARNING: OPENROUTER_API_KEY not found in environment") print("This is OK for companies (no LLM needed), but will fail for investors") async def test_parser(): """Test the new company parser with a small sample""" print("๐Ÿงช Testing Manual Company JSON Parser (No LLM)\n") # Load the company data df = pd.read_csv( "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Companies data.csv" ) # Process just the first 3 rows for testing test_df = df.head(3) processor = InvestorProcessor() print(f"Processing {len(test_df)} test companies...\n") results = await processor.parse_companies(test_df, save_to_db=False) print("\n" + "=" * 80) print("๐Ÿ“Š TEST RESULTS") print("=" * 80) for idx, result in enumerate(results, 1): print(f"\n{idx}. {result.get('name')}") print(f" Website: {result.get('website')}") print(f" Location: {result.get('location')}") print(f" Industry: {result.get('industry')}") print( f" Founded: {result.get('founded_year')}" if result.get("founded_year") else " Founded: Unknown" ) print(f" Executives: {len(result.get('key_executives', []))}") if result.get("key_executives"): for exec_member in result.get("key_executives", [])[:3]: # Show first 3 print(f" - {exec_member.get('name')} ({exec_member.get('title')})") print(f" Investors: {len(result.get('investor_names', []))}") if result.get("investor_names"): print( f" - {', '.join(result.get('investor_names', [])[:5])}" ) # Show first 5 print(f" Client Categories: {len(result.get('client_categories', []))}") if result.get("client_categories"): print( f" - {', '.join(result.get('client_categories', [])[:3])}" ) # Show first 3 print("\n" + "=" * 80) print(f"โœ… Successfully processed {len(results)}/{len(test_df)} companies") print("๐ŸŽ‰ No LLM calls needed - 100% manual parsing!") print("=" * 80) if __name__ == "__main__": asyncio.run(test_parser())