From d341cacb9a2bef8b79f7ffbae5608e04817ce3e1 Mon Sep 17 00:00:00 2001 From: bolade Date: Tue, 7 Oct 2025 15:24:36 +0100 Subject: [PATCH] Refactor investor and fund schemas to support new check size range - Removed deprecated `stage_focus` column from `InvestorTable` and `InvestorSchema`. - Updated `FundTable` to change `fund_size` from VARCHAR to INTEGER and added `check_size_lower` and `check_size_upper` columns. - Modified API routes to return investor-fund combinations as separate entries. - Created new `InvestorFundData` schema for combined investor-fund responses. - Implemented LLM parsing for check size range from estimated investment size. - Updated database migration script to reflect schema changes and ensure data integrity. - Removed obsolete verification and test scripts related to the old schema. --- COMPANY_PARSER_DOCS.md | 452 ---------------------------- SCHEMA_FIX.md | 237 --------------- app/db/models.py | 12 +- app/routers/investors.py | 273 ++++++++++++++--- app/schemas/router_schemas.py | 68 ++++- app/services/llm_parser.py | 90 +++++- app/services/querying.py | 2 + preprocessor/migrate_fund_schema.py | 159 ++++++++++ preprocessor/models.py | 12 +- preprocessor/version_two.db | Bin 5857280 -> 5976064 bytes test_company_parser.py | 78 ----- verify_schema.py | 57 ---- 12 files changed, 556 insertions(+), 884 deletions(-) delete mode 100644 COMPANY_PARSER_DOCS.md delete mode 100644 SCHEMA_FIX.md create mode 100644 preprocessor/migrate_fund_schema.py delete mode 100644 test_company_parser.py delete mode 100644 verify_schema.py diff --git a/COMPANY_PARSER_DOCS.md b/COMPANY_PARSER_DOCS.md deleted file mode 100644 index 3d874c4..0000000 --- a/COMPANY_PARSER_DOCS.md +++ /dev/null @@ -1,452 +0,0 @@ -# Company Parser Documentation - -## Overview - -The company CSV parser has been updated to use **100% manual JSON parsing** with **zero LLM calls**. This makes it extremely fast, cost-effective, and reliable. - -## Key Features - -### πŸš€ No LLM Required - -- **Manual JSON parsing** extracts all data directly from CSV -- **No AI calls** needed for structure parsing -- **Instant processing** - no API delays -- **Zero cost** - no LLM API fees - -### πŸ“Š Data Extracted - -**Basic Information:** - -- Company name -- Website -- Location/geographic focus -- Industry/sector description -- Founded year (auto-extracted from description) - -**People:** - -- Key executives/senior leadership -- Titles and roles -- Source URLs - -**Relationships:** - -- Investor names (from CSV column) -- Automatic linking to investors in database - -**Additional Data:** - -- Client categories -- Product descriptions -- Linked documents -- Researcher notes -- Missing fields tracking -- Data sources - -## CSV Format - -### Required Columns - -| Column Name | Description | Required | -| ------------------------ | ------------------------------ | -------- | -| `Name` | Company name | Yes | -| `Website` | Company website URL | No | -| `Investor` | Comma-separated investor names | No | -| `Final Investor Profile` | JSON string with company data | Yes | - -### JSON Profile Structure - -The `Final Investor Profile` column should contain a JSON object with: - -```json -{ - "companyDescription": "Company description text...", - "geographicFocus": "Location/HQ and sales focus", - "sectorDescription": "Industry/sector description", - "keyExecutives": [ - { - "name": "John Doe", - "title": "CEO", - "sourceUrl": "https://company.com/team" - } - ], - "clientCategories": ["Category 1", "Category 2"], - "productDescription": "Product/service description", - "linkedDocuments": ["https://doc1.com", "https://doc2.com"], - "researcherNotes": "Research notes...", - "missingImportantFields": ["field1", "field2"], - "sources": { - "companyDescription": "https://source1.com", - "keyExecutives": "https://source2.com" - } -} -``` - -## Usage - -### Via API - -```bash -curl -X POST "http://localhost:8585/parse-csv" \ - -F "file=@data/300 Companies data.csv" \ - -F "is_investor=0" -``` - -### Programmatically - -```python -import pandas as pd -from services.llm_parser import InvestorProcessor - -# Load CSV -df = pd.read_csv('companies.csv') - -# Create processor -processor = InvestorProcessor() - -# Parse and save to database (no LLM needed!) -results = await processor.parse_companies(df, save_to_db=True) -``` - -### Testing (Dry Run) - -```bash -python3 test_company_parser.py -``` - -## Processing Output - -### Console Example - -``` -πŸš€ Starting to process 100 companies... - -πŸ“Š Processing 1/100: Mammaly - βœ“ Parsed successfully - - Location: Berlin, Germany - - Industry: Pet health and nutrition - - Founded: 2020 - - Executives: 3 - - Investors: 3 - βœ… Saved to database (ID: 1234) - -πŸ“Š Processing 2/100: Ljusgarda - βœ“ Parsed successfully - - Location: Sweden - - Industry: Indoor agriculture - - Founded: 2018 - - Executives: 1 - - Investors: 4 - βœ… Saved to database (ID: 1235) - -πŸ’Ύ Committed batch at row 10 - -... - -πŸŽ‰ Completed! Processed 100/100 companies -``` - -## Database Schema - -### CompanyTable - -```python -class CompanyTable: - id: int - name: str - website: str | None - location: str | None - description: str | None - industry: str | None - founded_year: int | None - created_at: datetime - updated_at: datetime | None - - # Relationships - members: List[CompanyMember] # Key executives - investors: List[InvestorTable] # Linked investors - sectors: List[SectorTable] -``` - -### CompanyMember - -```python -class CompanyMember: - id: int - name: str - role: str | None # Job title - linkedin: str | None # Source URL - company_id: int -``` - -### Investor Linking - -Companies are automatically linked to investors: - -```python -# If investor exists in database -investor = db.query(InvestorTable).filter_by(name="Five Seasons Ventures").first() -if investor: - investor.portfolio_companies.append(company) -``` - -## Features - -### 1. Automatic Founding Year Extraction - -The parser automatically extracts founding years from company descriptions: - -**Patterns Recognized:** - -- "founded in 2020" -- "founded 2020" -- "GegrΓΌndet 2020" (German) -- "established in 2020" -- "since 2020" -- "(2020)" - year in parentheses - -**Example:** - -``` -Description: "mammaly is a leading European pet health startup founded in 2020..." -β†’ Founded Year: 2020 -``` - -### 2. Executive Name Extraction - -Extracts from multiple possible field names: - -- `keyExecutives` -- `seniorLeadership` - -### 3. Investor Relationship Management - -- Parses comma-separated investor names -- Links to existing investors in database -- Adds company to investor's portfolio -- Skips non-existent investors (logs warning) - -### 4. Upsert Logic - -- Updates existing companies with same name -- Preserves existing data if new data is null -- Replaces team members on update -- Maintains investor relationships - -## Performance - -### Speed - -| Metric | Value | -| ---------------------- | ------------ | -| Processing per company | ~1-2 seconds | -| 100 companies | ~2-3 minutes | -| 300 companies | ~6-9 minutes | - -### Comparison with Old LLM Parser - -| Metric | Old LLM Parser | New Manual Parser | Improvement | -| --------- | -------------- | ----------------- | ----------------- | -| Speed | 30-60s/company | 1-2s/company | **95%+ faster** | -| Cost | $0.02/company | $0.00/company | **100% savings** | -| API calls | 10-20/company | 0/company | **No LLM needed** | -| Accuracy | Variable | Consistent | **More reliable** | - -## Error Handling - -### Graceful Failures - -```python -# Missing required fields -if not name or not profile_json: - print("⚠️ Skipping - missing name or profile") - continue - -# JSON parsing errors -try: - profile = json.loads(profile_json) -except json.JSONDecodeError: - print("❌ Invalid JSON") - continue - -# Database errors -try: - db.commit() -except Exception as e: - db.rollback() - print(f"❌ Database error: {e}") -``` - -### Batch Commits - -Commits every 10 companies to avoid memory issues and ensure data persistence even if later errors occur. - -## Query Examples - -### Get Companies by Industry - -```python -companies = db.query(CompanyTable).filter( - CompanyTable.industry.like('%agriculture%') -).all() -``` - -### Get Companies Founded After 2018 - -```python -companies = db.query(CompanyTable).filter( - CompanyTable.founded_year >= 2018 -).all() -``` - -### Get Companies with Specific Investor - -```python -investor = db.query(InvestorTable).filter_by(name="Five Seasons Ventures").first() -companies = investor.portfolio_companies -``` - -### Get Companies by Location - -```python -companies = db.query(CompanyTable).filter( - CompanyTable.location.like('%Germany%') -).all() -``` - -## Benefits - -### 1. Speed ⚑ - -- **95%+ faster** than LLM-based parsing -- No API call delays -- Instant JSON parsing - -### 2. Cost πŸ’° - -- **$0 per company** (vs $0.02 with LLM) -- No LLM API fees -- 100% savings on large datasets - -### 3. Reliability 🎯 - -- **Consistent parsing** every time -- No LLM hallucinations -- Predictable results - -### 4. Simplicity 🧩 - -- **Zero configuration** needed -- No API keys required for companies -- Straightforward JSON parsing - -### 5. Completeness πŸ“‹ - -- Extracts **all available fields** -- No data loss -- Preserves source references - -## Integration with Investors - -Companies can reference investors, and investors can have companies in their portfolio: - -```python -# Query investors of a company -company = db.query(CompanyTable).filter_by(name="Mammaly").first() -investors = company.investors - -# Query companies of an investor -investor = db.query(InvestorTable).filter_by(name="Five Seasons Ventures").first() -companies = investor.portfolio_companies -``` - -## Troubleshooting - -### Issue: Company not saved - -**Check:** - -1. Valid JSON in `Final Investor Profile` column -2. Company `name` is not empty -3. No database constraint violations - -### Issue: Investors not linked - -**Possible causes:** - -1. Investor doesn't exist in database yet -2. Investor name spelling doesn't match exactly -3. Parse investors CSV first, then companies - -**Solution:** - -```python -# Always parse investors first -await processor.parse_investors(investors_df, save_to_db=True) -# Then parse companies -await processor.parse_companies(companies_df, save_to_db=True) -``` - -### Issue: Founded year not extracted - -**Reason:** Description doesn't contain recognizable year pattern - -**Solution:** Year patterns are best-effort. Add more patterns if needed or set manually: - -```python -company.founded_year = 2020 -db.commit() -``` - -## Extending the Parser - -### Add New Fields - -```python -# In process_company_profile method -company_data = { - # ... existing fields ... - "new_field": profile.get("newFieldName"), -} -``` - -### Add New Year Patterns - -```python -year_patterns = [ - # ... existing patterns ... - r'started in (\d{4})', - r'launched (\d{4})', -] -``` - -### Custom Post-Processing - -```python -async def parse_companies(self, df, save_to_db=True): - # ... existing code ... - - for company_data in results: - # Custom processing here - if company_data['industry'] == 'agriculture': - company_data['category'] = 'agtech' -``` - -## Best Practices - -1. **Parse investors first** - ensures investor relationships work -2. **Test on small sample** - use `save_to_db=False` first -3. **Check data quality** - review first few results -4. **Commit in batches** - default 10 companies per commit -5. **Monitor console** - watch for errors and warnings - -## Summary - -βœ… **100% manual parsing** - No LLM needed -βœ… **Instant processing** - 1-2s per company -βœ… **Zero cost** - No API fees -βœ… **Reliable** - Consistent results -βœ… **Complete** - All fields extracted -βœ… **Integrated** - Auto-links to investors - -The company parser is now as efficient as the investor parser, with the added benefit of requiring **zero LLM calls**! diff --git a/SCHEMA_FIX.md b/SCHEMA_FIX.md deleted file mode 100644 index 91abd5d..0000000 --- a/SCHEMA_FIX.md +++ /dev/null @@ -1,237 +0,0 @@ -# Schema Mismatch Fix - Summary - -## Problem - -When trying to parse the investor CSV, the following error occurred: - -``` -sqlite3.OperationalError: no such column: investors.stage_focus -``` - -## Root Cause - -The application models still referenced `stage_focus` column which was removed from the preprocessor database schema. The `stage_focus` was deprecated in favor of fund-level stage tracking (each fund has its own `investment_stage_focus`). - -## Files Fixed - -### 1. βœ… `app/db/models.py` - -**Removed:** `stage_focus` column from `InvestorTable` - -```python -# BEFORE: -stage_focus = Column(Enum(InvestmentStage), nullable=True) - -# AFTER: -# Removed completely -``` - -### 2. βœ… `app/schemas/py_schemas.py` - -**Removed:** `stage_focus` field from `InvestorSchema` - -```python -# BEFORE: -stage_focus: InvestmentStage = Field( - default=InvestmentStage.SEED, - description="Investment stage focus..." -) - -# AFTER: -# Removed completely -``` - -### 3. βœ… `app/services/llm_parser.py` - -**Removed:** `stage_focus` parameter from `_save_investor_to_db()` method - -```python -# BEFORE: -investor = InvestorTable( - ... - stage_focus=investor_data.investor.stage_focus, - ... -) - -# AFTER: -investor = InvestorTable( - ... - # stage_focus removed - ... -) -``` - -### 4. βœ… `app/db/db.py` - -**Fixed:** Database path to use absolute path to preprocessor database - -```python -# BEFORE: -DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./investors.db") - -# AFTER: -APP_DIR = Path(__file__).parent.parent -PREPROCESSOR_DB = APP_DIR.parent / "preprocessor" / "version_two.db" -DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{PREPROCESSOR_DB}") -``` - -## Verification - -Created `verify_schema.py` to check database schema: - -```bash -python3 verify_schema.py -``` - -**Results:** - -``` -βœ… 'stage_focus' column not in database (as expected) -βœ… All required enriched columns present -βœ… aum column is INTEGER type (correct) -``` - -## Architecture Decision - -**Stage Focus Tracking:** - -- ❌ **Old:** Single `stage_focus` at investor level -- βœ… **New:** Multiple stages tracked per fund via `investment_stage_focus` JSON array - -This allows investors with multiple funds targeting different stages. - -**Example:** - -```python -# Investor: Alumni Ventures -funds = [ - { - "fund_name": "Seed Fund", - "investment_stage_focus": ["Seed", "Early Stage"] - }, - { - "fund_name": "Growth Fund", - "investment_stage_focus": ["Series B", "Series C", "Growth"] - } -] -``` - -## Database Schema Status - -### InvestorTable (Current) - -``` -βœ… aum: INTEGER (for numerical filtering) -βœ… investment_thesis: JSON (array) -βœ… portfolio_highlights: JSON (array) -βœ… linked_documents: JSON (array) -βœ… researcher_notes: TEXT -βœ… missing_important_fields: JSON (array) -βœ… sources: JSON (object) -❌ stage_focus: REMOVED (moved to fund level) -``` - -### FundTable (Current) - -``` -βœ… fund_name: VARCHAR -βœ… fund_size: VARCHAR (USD integer as string) -βœ… estimated_investment_size: VARCHAR (USD integer as string) -βœ… geographic_focus: JSON (array) -βœ… investment_stage_focus: JSON (array) ⭐ REPLACES investor.stage_focus -βœ… sector_focus: JSON (array) -``` - -## Testing - -### Before Fix - -``` -❌ Error: no such column: investors.stage_focus -❌ Failed to save to database -``` - -### After Fix - -```bash -# Test with API -curl -X POST "http://localhost:8585/parse-csv" \ - -F "file=@data/300 Investors data.csv" \ - -F "is_investor=1" - -# Expected: Successfully parses and saves investors -``` - -## Migration Notes - -**For existing code that queries stage_focus:** - -```python -# OLD CODE (will break): -investors = db.query(InvestorTable).filter( - InvestorTable.stage_focus == InvestmentStage.SEED -).all() - -# NEW CODE (correct): -from sqlalchemy import func - -investors = db.query(InvestorTable).join(FundTable).filter( - func.json_extract(FundTable.investment_stage_focus, '$').contains('Seed') -).all() - -# Or better yet, use JSON operations: -investors = db.query(InvestorTable).join(FundTable).filter( - FundTable.investment_stage_focus.like('%Seed%') -).all() -``` - -## Benefits of This Change - -1. **Accurate Representation:** Investors can have multiple funds with different stage focuses -2. **No Data Loss:** Stage information preserved at fund level -3. **Better Queries:** Can filter by specific fund characteristics -4. **Scalability:** Supports complex investor portfolios - -## Next Steps - -1. βœ… Schema fixed -2. βœ… Database path corrected -3. βœ… Verification script created -4. πŸ”„ Ready to parse investor CSV -5. πŸ“ Update any existing queries that used `stage_focus` - -## Quick Reference - -**Correct Database Path:** - -``` -/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/preprocessor/version_two.db -``` - -**Access Fund Stage Info:** - -```python -for investor in investors: - for fund in investor.funds: - print(f"{fund.fund_name}: {fund.investment_stage_focus}") -``` - -**Query by Stage:** - -```python -# Get all seed-stage funds -seed_funds = db.query(FundTable).filter( - FundTable.investment_stage_focus.contains('Seed') -).all() - -# Get investors with seed funds -seed_investors = db.query(InvestorTable).join(FundTable).filter( - FundTable.investment_stage_focus.contains('Seed') -).distinct().all() -``` - -## Status - -βœ… **FIXED:** All schema mismatches resolved -βœ… **VERIFIED:** Database schema validated -βœ… **READY:** Can now parse investor CSV without errors diff --git a/app/db/models.py b/app/db/models.py index 3ae0253..86acced 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -160,11 +160,15 @@ class FundTable(Base, TimestampMixin): # Fund details fund_name = Column(String, nullable=True) - fund_size = Column(String, nullable=True) # Store as string to preserve currency + fund_size = Column( + Integer, nullable=True + ) # Store as integer for numerical filtering fund_size_source_url = Column(String, nullable=True) - estimated_investment_size = Column( - String, nullable=True - ) # e.g., "EUR 1,000 to 2,000" + + # Check size range (parsed from estimated_investment_size by LLM) + check_size_lower = Column(Integer, nullable=True) + check_size_upper = Column(Integer, nullable=True) + source_url = Column(String, nullable=True) source_provider = Column(String, nullable=True) # e.g., "Perplexity" diff --git a/app/routers/investors.py b/app/routers/investors.py index 3b1b20b..b26ebcb 100644 --- a/app/routers/investors.py +++ b/app/routers/investors.py @@ -4,7 +4,11 @@ from db.db import get_db from db.models import InvestorTable, SectorTable from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import BaseModel -from schemas.router_schemas import InvestmentStage, InvestorData +from schemas.router_schemas import ( + InvestmentStage, + InvestorData, + InvestorFundData, +) from sqlalchemy.orm import Session, selectinload router = APIRouter(tags=["Investor Routes"]) @@ -33,34 +37,95 @@ class InvestorUpdate(BaseModel): number_of_investments: Optional[int] = None -@router.get("/investors", response_model=List[InvestorData]) +@router.get("/investors", response_model=List[InvestorFundData]) def read_investors(db: Session = Depends(get_db)): - """Get all investors with their related data""" + """Get all investors with their funds as separate entries + + Each investor-fund combination is returned as a separate row. + An investor with 3 funds will appear as 3 entries. + """ investors = ( db.query(InvestorTable) .options( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .all() ) - # Transform InvestorTable objects to InvestorData format - investor_data_list = [] + # Transform to InvestorFundData format (one row per investor-fund combination) + investor_fund_list = [] for investor in investors: - investor_data = InvestorData( - investor=investor, # This maps to InvestorSchema - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_data_list.append(investor_data) + # If investor has funds, create one entry per fund + if investor.funds: + for fund in investor.funds: + investor_fund_data = InvestorFundData( + # Investor fields + investor_id=investor.id, + investor_name=investor.name, + investor_description=investor.description, + investor_website=investor.website, + investor_headquarters=investor.headquarters, + aum=investor.aum, + aum_as_of_date=investor.aum_as_of_date, + aum_source_url=investor.aum_source_url, + investment_thesis=investor.investment_thesis, + portfolio_highlights=investor.portfolio_highlights, + number_of_investments=investor.number_of_investments, + # Fund fields + fund_id=fund.id, + fund_name=fund.fund_name, + fund_size=fund.fund_size, + fund_size_source_url=fund.fund_size_source_url, + check_size_lower=fund.check_size_lower, + check_size_upper=fund.check_size_upper, + geographic_focus=fund.geographic_focus, + investment_stage_focus=fund.investment_stage_focus, + sector_focus=fund.sector_focus, + # Related data (same for all funds of this investor) + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_fund_list.append(investor_fund_data) + else: + # If no funds, create one entry with null fund fields + investor_fund_data = InvestorFundData( + # Investor fields + investor_id=investor.id, + investor_name=investor.name, + investor_description=investor.description, + investor_website=investor.website, + investor_headquarters=investor.headquarters, + aum=investor.aum, + aum_as_of_date=investor.aum_as_of_date, + aum_source_url=investor.aum_source_url, + investment_thesis=investor.investment_thesis, + portfolio_highlights=investor.portfolio_highlights, + number_of_investments=investor.number_of_investments, + # Fund fields (null) + fund_id=None, + fund_name=None, + fund_size=None, + fund_size_source_url=None, + check_size_lower=None, + check_size_upper=None, + geographic_focus=None, + investment_stage_focus=None, + sector_focus=None, + # Related data + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_fund_list.append(investor_fund_data) - return investor_data_list + return investor_fund_list -@router.get("/investors/filter", response_model=List[InvestorData]) +@router.get("/investors/filter", response_model=List[InvestorFundData]) def filter_investors( stage: Optional[InvestmentStage] = Query( None, description="Filter by investment stage" @@ -75,13 +140,18 @@ def filter_investors( max_aum: Optional[int] = Query(None, description="Maximum AUM"), db: Session = Depends(get_db), ): - """Filter investors based on various criteria""" + """Filter investors based on various criteria + + Returns investor-fund combinations as separate rows. + An investor with 3 funds will appear as 3 entries. + """ # Start with base query query = db.query(InvestorTable).options( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) # Apply filters @@ -111,29 +181,86 @@ def filter_investors( investors = query.all() - # Transform to InvestorData format - investor_data_list = [] + # Transform to InvestorFundData format (one row per investor-fund combination) + investor_fund_list = [] for investor in investors: - investor_data = InvestorData( - investor=investor, - portfolio_companies=investor.portfolio_companies, - team_members=investor.team_members, - sectors=investor.sectors, - ) - investor_data_list.append(investor_data) + # If investor has funds, create one entry per fund + if investor.funds: + for fund in investor.funds: + investor_fund_data = InvestorFundData( + # Investor fields + investor_id=investor.id, + investor_name=investor.name, + investor_description=investor.description, + investor_website=investor.website, + investor_headquarters=investor.headquarters, + aum=investor.aum, + aum_as_of_date=investor.aum_as_of_date, + aum_source_url=investor.aum_source_url, + investment_thesis=investor.investment_thesis, + portfolio_highlights=investor.portfolio_highlights, + number_of_investments=investor.number_of_investments, + # Fund fields + fund_id=fund.id, + fund_name=fund.fund_name, + fund_size=fund.fund_size, + fund_size_source_url=fund.fund_size_source_url, + check_size_lower=fund.check_size_lower, + check_size_upper=fund.check_size_upper, + geographic_focus=fund.geographic_focus, + investment_stage_focus=fund.investment_stage_focus, + sector_focus=fund.sector_focus, + # Related data + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_fund_list.append(investor_fund_data) + else: + # If no funds, create one entry with null fund fields + investor_fund_data = InvestorFundData( + # Investor fields + investor_id=investor.id, + investor_name=investor.name, + investor_description=investor.description, + investor_website=investor.website, + investor_headquarters=investor.headquarters, + aum=investor.aum, + aum_as_of_date=investor.aum_as_of_date, + aum_source_url=investor.aum_source_url, + investment_thesis=investor.investment_thesis, + portfolio_highlights=investor.portfolio_highlights, + number_of_investments=investor.number_of_investments, + # Fund fields (null) + fund_id=None, + fund_name=None, + fund_size=None, + fund_size_source_url=None, + check_size_lower=None, + check_size_upper=None, + geographic_focus=None, + investment_stage_focus=None, + sector_focus=None, + # Related data + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_fund_list.append(investor_fund_data) - return investor_data_list + return investor_fund_list @router.get("/investors/{investor_id}", response_model=InvestorData) def read_investor(investor_id: int, db: Session = Depends(get_db)): - """Get a specific investor by ID""" + """Get a specific investor by ID with all their funds""" investor = ( db.query(InvestorTable) .options( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .filter(InvestorTable.id == investor_id) .first() @@ -142,12 +269,13 @@ def read_investor(investor_id: int, db: Session = Depends(get_db)): if not investor: raise HTTPException(status_code=404, detail="Investor not found") - # Transform to InvestorData format + # Transform to InvestorData format (includes funds array) return InvestorData( investor=investor, portfolio_companies=investor.portfolio_companies, team_members=investor.team_members, sectors=investor.sectors, + funds=investor.funds, ) @@ -166,6 +294,7 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)): selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .filter(InvestorTable.id == db_investor.id) .first() @@ -177,6 +306,7 @@ def create_investor(investor: InvestorCreate, db: Session = Depends(get_db)): portfolio_companies=investor_with_relations.portfolio_companies, team_members=investor_with_relations.team_members, sectors=investor_with_relations.sectors, + funds=investor_with_relations.funds, ) @@ -205,6 +335,7 @@ def update_investor( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .filter(InvestorTable.id == investor_id) .first() @@ -216,6 +347,7 @@ def update_investor( portfolio_companies=investor_with_relations.portfolio_companies, team_members=investor_with_relations.team_members, sectors=investor_with_relations.sectors, + funds=investor_with_relations.funds, ) @@ -233,13 +365,16 @@ def delete_investor(investor_id: int, db: Session = Depends(get_db)): return {"message": "Investor deleted successfully"} -@router.get("/investors/{investor_id}/similar", response_model=List[InvestorData]) +@router.get("/investors/{investor_id}/similar", response_model=List[InvestorFundData]) def find_similar_investors( investor_id: int, limit: int = Query(10, description="Maximum number of similar investors to return"), db: Session = Depends(get_db), ): - """Find investors similar to a given investor based on characteristics""" + """Find investors similar to a given investor based on characteristics + + Returns investor-fund combinations as separate rows. + """ # Get the target investor target_investor = ( @@ -248,6 +383,7 @@ def find_similar_investors( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .filter(InvestorTable.id == investor_id) .first() @@ -266,6 +402,7 @@ def find_similar_investors( selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .filter(InvestorTable.id != investor_id) .all() @@ -338,13 +475,71 @@ def find_similar_investors( scored_investors.sort(key=lambda x: x[0], reverse=True) similar_investors = [inv for score, inv in scored_investors[:limit]] - # Transform to InvestorData format - return [ - InvestorData( - investor=inv, - portfolio_companies=inv.portfolio_companies, - team_members=inv.team_members, - sectors=inv.sectors, - ) - for inv in similar_investors - ] + # Transform to InvestorFundData format (one row per investor-fund combination) + investor_fund_list = [] + for investor in similar_investors: + # If investor has funds, create one entry per fund + if investor.funds: + for fund in investor.funds: + investor_fund_data = InvestorFundData( + # Investor fields + investor_id=investor.id, + investor_name=investor.name, + investor_description=investor.description, + investor_website=investor.website, + investor_headquarters=investor.headquarters, + aum=investor.aum, + aum_as_of_date=investor.aum_as_of_date, + aum_source_url=investor.aum_source_url, + investment_thesis=investor.investment_thesis, + portfolio_highlights=investor.portfolio_highlights, + number_of_investments=investor.number_of_investments, + # Fund fields + fund_id=fund.id, + fund_name=fund.fund_name, + fund_size=fund.fund_size, + fund_size_source_url=fund.fund_size_source_url, + check_size_lower=fund.check_size_lower, + check_size_upper=fund.check_size_upper, + geographic_focus=fund.geographic_focus, + investment_stage_focus=fund.investment_stage_focus, + sector_focus=fund.sector_focus, + # Related data + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_fund_list.append(investor_fund_data) + else: + # If no funds, create one entry with null fund fields + investor_fund_data = InvestorFundData( + # Investor fields + investor_id=investor.id, + investor_name=investor.name, + investor_description=investor.description, + investor_website=investor.website, + investor_headquarters=investor.headquarters, + aum=investor.aum, + aum_as_of_date=investor.aum_as_of_date, + aum_source_url=investor.aum_source_url, + investment_thesis=investor.investment_thesis, + portfolio_highlights=investor.portfolio_highlights, + number_of_investments=investor.number_of_investments, + # Fund fields (null) + fund_id=None, + fund_name=None, + fund_size=None, + fund_size_source_url=None, + check_size_lower=None, + check_size_upper=None, + geographic_focus=None, + investment_stage_focus=None, + sector_focus=None, + # Related data + portfolio_companies=investor.portfolio_companies, + team_members=investor.team_members, + sectors=investor.sectors, + ) + investor_fund_list.append(investor_fund_data) + + return investor_fund_list diff --git a/app/schemas/router_schemas.py b/app/schemas/router_schemas.py index 1d1a685..942f2b1 100644 --- a/app/schemas/router_schemas.py +++ b/app/schemas/router_schemas.py @@ -32,6 +32,25 @@ class InvestorMemberSchema(BaseModel): from_attributes = True +class FundSchema(BaseModel): + id: int + fund_name: str | None + fund_size: int | None # Changed to int for numerical filtering + fund_size_source_url: str | None + check_size_lower: int | None # NEW: Lower bound of check size range + check_size_upper: int | None # NEW: Upper bound of check size range + source_url: str | None + source_provider: str | None + geographic_focus: List[str] | None + investment_stage_focus: List[str] | None + sector_focus: List[str] | None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + + class Config: + from_attributes = True + + class CompanyMemberSchema(BaseModel): id: int name: Optional[str] @@ -76,12 +95,53 @@ class InvestorSchema(BaseModel): class InvestorData(BaseModel): - """Comprehensive investor data schema for LLM processing""" + """Comprehensive investor data schema - used for individual investor requests""" investor: InvestorSchema portfolio_companies: List[CompanySchema] team_members: List[InvestorMemberSchema] sectors: List[SectorSchema] + funds: List[FundSchema] + + class Config: + from_attributes = True + + +class InvestorFundData(BaseModel): + """Investor-Fund combined data - used for list/filter requests + + Each row represents one investor-fund combination. + An investor with 3 funds will appear as 3 separate entries. + """ + + # Investor fields + investor_id: int + investor_name: str + investor_description: Optional[str] + investor_website: Optional[str] + investor_headquarters: Optional[str] + aum: int | None + aum_as_of_date: str | None + aum_source_url: str | None + investment_thesis: List[str] | None + portfolio_highlights: List[str] | None + number_of_investments: int | None + + # Fund fields + fund_id: int | None + fund_name: str | None + fund_size: int | None # Changed to int for numerical filtering + fund_size_source_url: str | None + check_size_lower: int | None # NEW: Lower bound of check size range + check_size_upper: int | None # NEW: Upper bound of check size range + geographic_focus: List[str] | None + investment_stage_focus: List[str] | None + sector_focus: List[str] | None + + # Related data + portfolio_companies: List[CompanySchema] + team_members: List[InvestorMemberSchema] + sectors: List[SectorSchema] class Config: from_attributes = True @@ -99,3 +159,9 @@ class CompanyData(BaseModel): # Renamed from CompaniesData for consistency class InvestorList(BaseModel): investors: List[InvestorData] + + +class InvestorFundList(BaseModel): + """List of investor-fund combinations""" + + investor_funds: List[InvestorFundData] diff --git a/app/services/llm_parser.py b/app/services/llm_parser.py index 4111434..7fbd46d 100644 --- a/app/services/llm_parser.py +++ b/app/services/llm_parser.py @@ -27,6 +27,15 @@ class CurrencyConversion(BaseModel): notes: str = "" +class CheckSizeRange(BaseModel): + """Schema for LLM check size range parsing from estimated investment size""" + + lower_bound_usd: int = 0 + upper_bound_usd: int = 0 + confidence: str = "high" # high, medium, low + notes: str = "" + + class InvestorProcessor: def __init__(self): self.llm = ChatOpenAI( @@ -36,10 +45,12 @@ class InvestorProcessor: temperature=0, ) - # Only use structured LLM for currency conversion + # Structured LLMs for specific parsing tasks self.currency_converter_llm = self.llm.with_structured_output( CurrencyConversion ) + self.check_size_parser_llm = self.llm.with_structured_output(CheckSizeRange) + # Keep legacy structured LLMs for backward compatibility self.investor_structured_llm = self.llm.with_structured_output(InvestorData) self.company_structured_llm = self.llm.with_structured_output(CompanyData) @@ -77,6 +88,57 @@ Return only the USD integer amount with current exchange rates.""" print(f"Error converting currency '{amount_str}': {e}") return None + async def parse_check_size_range( + self, estimated_investment_str: str + ) -> tuple[Optional[int], Optional[int]]: + """ + Use LLM to parse check size range from estimated investment size string. + Returns tuple of (lower_bound_usd, upper_bound_usd). + + Handles formats like: + - "EUR 1,000 to 2,000" + - "$100K-$500K" + - "Between $1M and $5M" + - "Up to EUR 10 million" + - "$2M typical" + """ + if ( + not estimated_investment_str + or estimated_investment_str == "Not Available" + or estimated_investment_str == "0" + ): + return None, None + + try: + prompt = f"""Parse this check size/investment range into lower and upper bounds in USD as integers. + +Input: {estimated_investment_str} + +Instructions: +- If it's a range (e.g., "EUR 1M to 5M"), extract both bounds +- If it's a single amount (e.g., "$2M typical"), use it as both lower and upper +- If it says "up to X", use 0 as lower and X as upper +- Convert all currencies to USD using current exchange rates +- Return integers (whole numbers, no decimals) + +Examples: +- "EUR 1,000 to 2,000" -> lower: 1100, upper: 2200 +- "$100K-$500K" -> lower: 100000, upper: 500000 +- "Between $1M and $5M" -> lower: 1000000, upper: 5000000 +- "Up to EUR 10 million" -> lower: 0, upper: 11000000 +- "$2M typical" -> lower: 2000000, upper: 2000000 +- "GBP 500K-2M" -> lower: 600000, upper: 2400000 + +Return the lower and upper bounds in USD.""" + + result = await self.check_size_parser_llm.ainvoke(prompt) + lower = result.lower_bound_usd if result.lower_bound_usd > 0 else None + upper = result.upper_bound_usd if result.upper_bound_usd > 0 else None + return lower, upper + except Exception as e: + print(f"Error parsing check size range '{estimated_investment_str}': {e}") + return None, None + def parse_json_profile(self, json_str: str) -> Optional[dict]: """ Manually parse the JSON profile from the CSV. @@ -157,7 +219,8 @@ Return only the USD integer amount with current exchange rates.""" "fund_name": fund.get("fundName"), "fund_size": None, "fund_size_source_url": fund.get("fundSizeSourceUrl"), - "estimated_investment_size": None, + "check_size_lower": None, + "check_size_upper": None, "source_url": fund.get("sourceUrl"), "source_provider": fund.get("sourceProvider"), "geographic_focus": fund.get("geographicFocus", []), @@ -165,19 +228,23 @@ Return only the USD integer amount with current exchange rates.""" "sector_focus": fund.get("sectorFocus", []), } - # Convert fund size to USD + # Convert fund size to USD integer fund_size_str = fund.get("fundSize") if fund_size_str and fund_size_str != "Not Available": fund_size_usd = await self.convert_to_usd(fund_size_str) if fund_size_usd: - fund_data["fund_size"] = str(fund_size_usd) + fund_data["fund_size"] = fund_size_usd # Store as integer - # Convert estimated investment size + # Parse check size range from estimated investment size est_size_str = fund.get("estimatedInvestmentSize") if est_size_str and est_size_str != "Not Available": - est_size_usd = await self.convert_to_usd(est_size_str) - if est_size_usd: - fund_data["estimated_investment_size"] = str(est_size_usd) + check_lower, check_upper = await self.parse_check_size_range( + est_size_str + ) + if check_lower is not None: + fund_data["check_size_lower"] = check_lower + if check_upper is not None: + fund_data["check_size_upper"] = check_upper investor_data["funds"].append(fund_data) @@ -430,11 +497,10 @@ Return only the USD integer amount with current exchange rates.""" fund = FundTable( investor_id=investor.id, fund_name=fund_data.get("fund_name"), - fund_size=fund_data.get("fund_size"), + fund_size=fund_data.get("fund_size"), # Now an integer fund_size_source_url=fund_data.get("fund_size_source_url"), - estimated_investment_size=fund_data.get( - "estimated_investment_size" - ), + check_size_lower=fund_data.get("check_size_lower"), # NEW + check_size_upper=fund_data.get("check_size_upper"), # NEW source_url=fund_data.get("source_url"), source_provider=fund_data.get("source_provider"), geographic_focus=fund_data.get("geographic_focus"), diff --git a/app/services/querying.py b/app/services/querying.py index 3078e18..27df87a 100644 --- a/app/services/querying.py +++ b/app/services/querying.py @@ -95,6 +95,7 @@ class QueryProcessor: selectinload(InvestorTable.portfolio_companies), selectinload(InvestorTable.team_members), selectinload(InvestorTable.sectors), + selectinload(InvestorTable.funds), ) .filter(InvestorTable.id.in_(investor_ids)) ) @@ -109,6 +110,7 @@ class QueryProcessor: portfolio_companies=investor.portfolio_companies, team_members=investor.team_members, sectors=investor.sectors, + funds=investor.funds, ) investor_data_list.append(investor_data) diff --git a/preprocessor/migrate_fund_schema.py b/preprocessor/migrate_fund_schema.py new file mode 100644 index 0000000..dae12bf --- /dev/null +++ b/preprocessor/migrate_fund_schema.py @@ -0,0 +1,159 @@ +""" +Migration script to update FundTable schema: +- Change fund_size from VARCHAR to INTEGER +- Remove estimated_investment_size column +- Add check_size_lower INTEGER column +- Add check_size_upper INTEGER column +""" + +import sys +from pathlib import Path + +# Add preprocessor to path +sys.path.insert(0, str(Path(__file__).parent)) + +from models import engine +from sqlalchemy import text + + +def migrate_fund_table(): + """ + Migrate the funds table to add check_size fields and update fund_size type. + + SQLite doesn't support ALTER COLUMN directly, so we need to: + 1. Create new table with correct schema + 2. Copy data from old table + 3. Drop old table + 4. Rename new table + """ + + print("πŸ”„ Starting fund table migration...") + + with engine.connect() as conn: + # Start transaction + trans = conn.begin() + + try: + # Check if migration is needed + result = conn.execute(text("PRAGMA table_info(funds)")) + columns = {row[1]: row[2] for row in result} + + if "check_size_lower" in columns and "check_size_upper" in columns: + print("βœ… Migration already applied - check_size columns exist") + return + + print("πŸ“Š Current columns:", list(columns.keys())) + + # Create new table with updated schema + print("\n1️⃣ Creating new funds table with updated schema...") + conn.execute( + text(""" + CREATE TABLE IF NOT EXISTS funds_new ( + id INTEGER PRIMARY KEY, + investor_id INTEGER NOT NULL, + fund_name VARCHAR, + fund_size INTEGER, + fund_size_source_url VARCHAR, + check_size_lower INTEGER, + check_size_upper INTEGER, + source_url VARCHAR, + source_provider VARCHAR, + geographic_focus JSON, + investment_stage_focus JSON, + sector_focus JSON, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP NOT NULL, + updated_at DATETIME, + FOREIGN KEY (investor_id) REFERENCES investors(id) + ) + """) + ) + + # Copy data from old table to new table + print("2️⃣ Copying data from old table...") + + # Check if old estimated_investment_size column exists + if "estimated_investment_size" in columns: + # We have estimated_investment_size but it's a string + # We'll set check_size fields to NULL for now - they'll be repopulated when re-parsing + conn.execute( + text(""" + INSERT INTO funds_new ( + id, investor_id, fund_name, fund_size, fund_size_source_url, + check_size_lower, check_size_upper, + source_url, source_provider, + geographic_focus, investment_stage_focus, sector_focus, + created_at, updated_at + ) + SELECT + id, investor_id, fund_name, + CAST(fund_size AS INTEGER) as fund_size, + fund_size_source_url, + NULL as check_size_lower, + NULL as check_size_upper, + source_url, source_provider, + geographic_focus, investment_stage_focus, sector_focus, + created_at, updated_at + FROM funds + """) + ) + else: + # No estimated_investment_size column (fresh install or already migrated partially) + conn.execute( + text(""" + INSERT INTO funds_new ( + id, investor_id, fund_name, fund_size, fund_size_source_url, + check_size_lower, check_size_upper, + source_url, source_provider, + geographic_focus, investment_stage_focus, sector_focus, + created_at, updated_at + ) + SELECT + id, investor_id, fund_name, + CAST(fund_size AS INTEGER) as fund_size, + fund_size_source_url, + NULL as check_size_lower, + NULL as check_size_upper, + source_url, source_provider, + geographic_focus, investment_stage_focus, sector_focus, + created_at, updated_at + FROM funds + """) + ) + + rows_copied = conn.execute( + text("SELECT COUNT(*) FROM funds_new") + ).fetchone()[0] + print(f" βœ… Copied {rows_copied} rows") + + # Drop old table + print("3️⃣ Dropping old funds table...") + conn.execute(text("DROP TABLE funds")) + + # Rename new table + print("4️⃣ Renaming funds_new to funds...") + conn.execute(text("ALTER TABLE funds_new RENAME TO funds")) + + # Commit transaction + trans.commit() + + print("\nβœ… Migration completed successfully!") + print("\nπŸ“ Summary:") + print(" - fund_size: VARCHAR β†’ INTEGER") + print(" - estimated_investment_size: REMOVED") + print(" - check_size_lower: ADDED (INTEGER)") + print(" - check_size_upper: ADDED (INTEGER)") + print(f" - {rows_copied} fund records migrated") + + print( + "\n⚠️ Note: check_size_lower and check_size_upper are NULL for existing records." + ) + print(" Run the investor CSV parser again to populate these fields.") + + except Exception as e: + trans.rollback() + print(f"\n❌ Migration failed: {e}") + raise + + +if __name__ == "__main__": + migrate_fund_table() diff --git a/preprocessor/models.py b/preprocessor/models.py index bf0073b..d768803 100644 --- a/preprocessor/models.py +++ b/preprocessor/models.py @@ -223,11 +223,15 @@ class FundTable(Base, TimestampMixin): # Fund details fund_name = Column(String, nullable=True) - fund_size = Column(String, nullable=True) # Store as string to preserve currency + fund_size = Column( + Integer, nullable=True + ) # Store as integer for numerical filtering fund_size_source_url = Column(String, nullable=True) - estimated_investment_size = Column( - String, nullable=True - ) # e.g., "EUR 1,000 to 2,000" + + # Check size range (parsed from estimated_investment_size by LLM) + check_size_lower = Column(Integer, nullable=True) + check_size_upper = Column(Integer, nullable=True) + source_url = Column(String, nullable=True) source_provider = Column(String, nullable=True) # e.g., "Perplexity" diff --git a/preprocessor/version_two.db b/preprocessor/version_two.db index 815c5db5288235a06a226de52117452155c3fe9e..174cc40850958370c4245a8a30b963ff8fe86a4c 100644 GIT binary patch delta 13991 zcmaia3tUvy_WwRJXJ*dKnHk;!A~FM_ARr>5A}Syv9%hDlKgb6nqT&M(#AhkzU=K6% zbz_&6rj=TjT3XXnu`J87$E|+V%~G?vX+C@1+x!1LXSD3ze!st;&)TwQuf6u(Yp=ET zdYsOl&sCi#&#Bg>#N17YW*;FW8IUIbzdpB+xm%q2#8hUh)2ay(Bl#DJ+G~E%bkd|T z=I~<;6ZP%7A9br(4g=@!up}K@@8VY6WKPm0CMK#@FIiAOZEoX?j^^p@N~56_p2~7x zrNvi1)?R5zSAI;lWEm`f@vmvRMX2#rR#kc|E|1_S_taW!m9@EljoZAiv3)^HTb(?K zzER6)>D|?gq2Vu)*qCE0_C2H3FV%@rm3K{H!W?vx{bd^qj>=)HQ_H8qPN#D z$t(VHs9{!P!<;&qdR<3rYh&C0y+(UWM_U6h+vfh?TfTzV+SanLX*!6?=NtFG9&l!3 z%gna=)>%yrbu(HTI@&GOUT4k!a>(ZbOrKGId6M?V2AT2yUkW`wW_a~qQs`)%UcaDm zdR_ej%Qy&!PjFQJ^)ALcJ(WULjVuNhIyK0o_sVLTp8a1mv%{Q^aWO0*s)BG#G?S+a zuL?4YlESf_HO)-e63yIYUeT8jb`t<1&u#|n0z3(L0dNfPA>b3hMZgb$-=G>=Kr8^N zp~(c~0SW=50OJ65fFDo?XackXIshVIHJ}?Rl?1@QaAGvGG-^M?M&Hcd#{g6SHGl015#^0Y!jefZ>1*Es+&dZ|ed-UwdSEe@|$Gf8y+RS8(s2 z&Y&5nX`-oJ5c9&Fy_F*Xl?L!jOSF=+xpxq%ZM!Q{{i}pcr z650pE8npL|rD(T_L(py&`=Py0OhCIuWYBJIM{{r4G_?1)(cC>2&8DGf?vfX{Gung= zN0SXCYMyzAX`yj3f0F0)qV79gopu7dO}$^OQKd60X=|x_E-$u3M-vscM+lv{PUuU| z2OTClMx#ogd)5g%4JlDkgz+oGQ`ZZ*Gt;^^b+g$cT;X3Tbw?hV3mU9KsL zsN95pxp6~S9t&@JS#Wb1)4+7d5R&@w(`p>K8zi*<2&ELBdZg9>X6dJZ${SzG}9 zoC-r(UHpO0?p&cr2Db> zt|U}*xA3+uL4#xG6MFn^VZC7)iv@6xSYY8jLT2~Q5KnjABOJC2QpXTg3M;};S6Hou z;s99kTPo^XC$zLRYj0oF8QyrW5X0yyRG1G=JiJ*rI53*Q)=Ok!L1Hb*QD(HYESfI6 z3Qf&3EyBEcEz_FjHZ``<`|c5n>6ka|M*8s z-8Nmawp(k`T+hCtepfY3@kl6mcojni}5ThUqqjm--> z+8W#GQ(J{){Ot4?!eq130331jHX)JKwtPeA`pwQhH1~uspI+W3JY!g71yU*zt%UBs zU$~T*V*$hPjuBEyteGN?(pKKuxR}Q55G?f61HvMmISo6=BXr7x!mS2vUvzS0bb9$g zA(8gl0dX3+T`1v1lBx{RW!r`8b=p4YWE1+!c0tnVQqUPq=#4wjY3Yql8lk40!ff5L z_0dzj;JzN5!?az#U)}Q*m`Sy zGyJ1^OF4X(@^!l)txzjjX$m83Td23Xz(rJK|@_3*b~w9b8id% zwSvW4;ccaT|1LaYm@))oUgh>MmJ|7LashsX3BvC1W2WX0d|mrsaMwxblkW%@4Tbr@ zi|G9BcdsbWh!fRG^w)QV>HL5}(9UdDEC89g?+Hn)RvOz$OY5pVk!HmI2q$#*JdnsF zH1&Pqh;C^vI(|aWzK^?~nuC=Z2%Yf(Y*58Oj06dt@S$+fP>_w0;Vc%W(f<(ovYHK# zPo<-xs#7D);(rKl7-nPvCvpVmM>yAg9-E%t`jN0p*LOgKRLOB+lWt~zpiL(9qvOJ9 zU9U`xI0!xbv9KXMwI8&?s;G`MAj4WD>sh(o>l11$f*=^0g`R8l=&DA2U@|^5P`?gl@Xa%JOm z`_}5ShN?2Ku$Yxn3B7AuwUySsWqlshUmX-n;E+rui8P-%WSV$(&0$SaR z-t<6qoMO)&xwAT{``&7fzWP9Qx6UyJhjNo}J&-H}Q zZLhX4y3wP6D3EaM&gx=DH>gC}Ep(QBA z=r~AtlG|orbRlPCXs*ZhuA!j@)JH3<7rZu0g!N+_Hjdutwe2v>c3}UNtf(fm$Y<-% zGU^#+bhFP^A~(zX2J9G4W<^+fvBcV5G`Wxr8xIq232>L*?xbVbGI7 z6c!ga0-Enm5xO;Cd)6?v8YplJz|O{?%^KOx=H<3zx<6=JV5k?sz*JU@fgK9j z;#l(+k9X2jH`@l#6SHk8;k6+f!{}-zfJQx`-L5otm0R&U zR&dH+Vx=(96iT9abFpcv@vt$GA7!v|<8|Y;yEK1i=c#w76ICmihpDTRuadp-EKZ7^ zhWn@@;nmT0Bct=wE6*^!H^%;kE@3J*46UCZYhSN3)uDr@n~Srr(p5~sBsZZn&VJA^ zZ8El%&x(nJ*2UZVuu*QQjLu(d?-RZ+-k!!70+TQ^V%ALEpIxM$ulhwbgfRkzh?Cnf3(iT_TE_>Rl+mX2!k_ejI|C-n$HMt+ z5Wb4FJRdKXaYSKFOWT53mVyE_vOFigzmiaKg(HcUyr3`l-PpOHiT6D5W%~i0W)3FcrT^$KL^|GUEa{p zIM)&i2>lNX>!jil`&U`Bns7@wtXL1UdXknS#{G^LfWcM{brm}!{Ol`s3!~4RC2tH5 z2|fL)eITnFU?#NGk3p7H=Uj3`e%60S=h5m}LlcZ*f>Cf6RV8FP1c3MW)AIwUEgX?plhewHMHlL-Ta^3Uyj-DG)!;6F54C6Zhpf)T4t^>eEto4 zB4aU=qa-TXJk4Y?b{SsO8+C7MKh?afA?glQ2XySHY&f@D!~{`xT@hjsQcm(JoQ0&{ z%qqC9S0sV1f7%^S?>^yZG)ZQWz%$Nd#Z(A;eW#-rJGA{dLOVMh6J)d$dbZQCN_WEw z90=08M0EUM;Fe=XJuA*5beZHRViQjM&_my9cUZz7N{%eXP;mnoGhErfVI^44xE(1} zSm{`)OTHdt%1QX0m5u~PH+PxRqZe*;e5o6;6rIr|eEcTIP$p{V60iVEtRTcotQGXs zD$psp#SzOUjDS=7_G(bv-On*4^4CMRI^wBzmE#ycdNJk+$~^kTv*s~q>o>SjnuLogtp)27#y+At{wKF^yF=h?Yf~I7=@zT zw#KnnH+=y*vQFRbIMMBLu>GQ$T4Js=Z8Ey~5BV~~VS`FPmFwZ6wGV5KvXXkas*I@( zz-*$YRm@6V^R4M*miM7-LMP$;Z4OvL{x@^7SY#8b2AvHYJzT=q1^%2cL z6$tHk$8jguSCm6YD0KY0j=TDaV?;R?%vJ;ro|TwdNch`r^$Y4-JLWF1I2#)4o9Wiu zb#ZjcEjk^2^M4#Gxg{bZLp|J}>)v;Ca0w!Z9vsF0dEc>)>kxJ5We_^=1IK)0rKrWN zj%G#36g~3+ZcFv!CD@2ku;h7rP5Cu{z4)PHo?aXvs&To6g#P=uTA<8g-}YR%&TYJYeeTC*rYG<*_2-E9zYB?9DRH$2#f!)lMPYv)Wn2 z@NHs-a?{`r{p~hqq2k5Ob~}^lq&3dJ@p>^G!x3Zp;~M7`$F}!TrwM}PJj5*%tr%}0 z)Uek1yCPu0A@O&0s22dDbMPisEp=ZLB3MO@NbPS^CO z4=s1*(i5j^HgGK>LN{4T7JpXr6&Dm!fCvX;_UAS4@Uz9<=z&|mpE~a}(%t{8xszWgCILx_o2l!|nk)WKNwG>#*M3>^ z6gHU{S*_+w&4=7{u@`#vBs}PBO&!yHsfO)qCg+HGg=x02oWIU6PCuF}(v8%nvy;?% zRRVJu4oAvSz33B%U>-b)7tT80P*8x68MA(F6&O~57aEo(LBrZ|LFWusdjbJK&4PND3Wt46ZGl5?s`@yo%C&Su0 zIhK93>ZFX?CyZt&(|yvZNo5X^s3B$>9LXRr7#v)a*3JH+ep&S|m5n(h8^bNIT3WFb z_Xn41?nIZwXNhCbi^LfJp6I%w5#dFyXj(DJHJP)CqcL6r(>=*`B)w2XWTS#Z9dReD zjfk1Z4~hp5FTwQ0jh)o`xHE%XVFOG!yNMR+olTx15+9<_bLthCIuFmOD77AmwjWJFYi!sm5O0r!@IPJ`fcpJ9U z&Q(-g<0_*mpE*C_7KiW zGekRv<`DWrs{7M;y@;3!$Hu=RV$H9F`lQo~Msshuhrl=0S(E0yi@Wmnnn<4b6Ltt)7>h-E;kVkHOweX-UxfMxd2xRS6i zU*~Fzq%M@d;wQRX-Gf~$G8Z|-JjFB+*`=}!X@0c(NPLMH0-JD4ge)TksqOgdefo*y&ZT9s@Iu$exaagw6obG~ zB(R0#=V!|}`C+BkMAP;JqUr1{?h&*k(cR6B6%pCV8t_4)`v5mvL;?fPCA~Mv{RKBz zLeicS!1V#O3(=%L>35p3$QMnc__Rbw^9gIcH02)k3-Q4TCUE0-}N7zTyU#mx}Ml(C8D_U)rWATATNhc!mQdKC~v`uzT zX2c#xCfG%Z-rIJbQPa=4Kx@11h@@DoLZYETE5 zak}MGHqIqjYXm$eaO5mc-^?M`C&R1skrZWaIN_RrkIWLY-R zHHjXct~-cE`?>phye$nN+)n7x&)rA3IuUsd`BZm);r@u5CCYgXyvHdscC{9HW z@qOc$?yvbVVjawVg?yc~{)~G7t6FF239mcjHZuGWaSF!Zz$Refv+jOu`moiV6k|oy z{*^n5CVu5U$Xmq8vXHTI(*ERE?rb*x_{J+pRp;DKc3V7bA0x35(<8?5h64s2w^Vyc zv!5+c?^M0Q{6K7S=$DhxHBelvJXa#bRTQk&MWU>6JFjR&?0ZL$5n1a^qB*7I7i4dX?ugrm-{@T9Trk2%Ze z8keUJEgR=)rv1iw{tmZzK9GiMt|IAhpvOF)_gsUwztZz}u24iGL*B}o@t!?KR&2p6 zn^NeZRaKsBR`<2&N1_e1`3WA+?|;37*r*wtDr7|zE&ee9yU^Jll~Zz%$`#d~bUIq_ z{4&rb&I1yX5b%cq^}&@ku=DluwgvTbEsn~Xa>X(csYqEPIgFXns_y$dY*s8eO`=${ z+BC>Gk#93d+-5FAcdPb{)~z|Hv9hbw7u4fb&ydsdy@mUAc_jlDf)}GWCMgw@B|O;V z*{hZ;?q84f&aq&zD@IcmsO4H}Qz)l)))p4_d=nKb(hGa2HBs_Vzh1!u(e}t2wub zT!H-7mc8mZ#@!$ySs;4^@kc#BWG@z%U}}MSnf$;M=QM(Cw5D;9rM!9O+@^X9ihNDD ztch#j2Il)S>6F(zkM@)6u4eKii87g9H{Q%w>fvMQ0@_WQk!+4?6LSH%ttaJ+0s9gp zRIFrv-1eMzJ(nUS%8L=&?|CofvLw{3U{UA`&wC#*nxq6=Aqt{$oI3x2H=or^uI{8e z4|x5NruLwBN4msIC}*iMW$jo(itA)Qj_kXVo#}6}k9E?;7Viaak|c-uIV3#K>J2ez zBcxa?rKEJ4NO2)z46<)ROLI$`#okii+)lqt_uk8yq!`SB19@A9cNN3irD)8FgllP; z-hoQsIWg0_&S;RLuvMfR5L(dhGrdD2d5*&V-T*yS0j}@r@BPp?MlyrkWI1zA=-L6^ zA*|x=9v$E<`5kL0U2gK}>6!lCLQ1o|Bl&X4gyThGl8>^ySCXrnmU)ddGu!(;H(D}+ zC<+nj1HHuz-y+FX=E&RHnB&coEsdEvrl2<0X9|*v`*OTn`pVbEGm}VE$c%{F@Uvdz z5_Q8gud3@+JD5qb8#4xOOqC?Z*%244yTo@TZHwA*GCg17yT}cbPyw**^J=LN#v?ZBhOjq{t(}!m!rQ~%T9t9Ql!7t5DzXD% zc*dI%Ie|FaTSb-TwP(Cbxn2@Vv9jjapY?8lp+GGb0zyB3*1IKFW@{{Q5%YDXamK&# zpBfJ97jmt-R_&80Qa7-B)!(QWs@_-iXJ!$XtPTB)QoJMwA%YV3U3kWq#WMf8wTzZO z>+3^3$9%o$iX0zD>z?)P;6_Sv!Ez>Qz|Z-fHd!Y$&uFV}U(nXkAlJpK z#p^q1=MZ0p+*HxJ0$)aIe`yeACH`qv6+QuM&v`48kinGy9@0=e#FxVUiC(kydI=RyRWgKyj3)V3agn^;h-wpZVr9o48;zmk3w^uN zRgxU-r{UuZQcUuN$ONxk^_LC5lCe)5>idkVmgGDDd`DH0Z#)D0Eyw+k1m`dxyw^d} z0PG5p{oLWchq+v-KYB3#sUv(l`B+H~)FbNn<_O=F6v0*BHJ0WT`<~#_rG5~lVvQJu z@^G;)T}yOlZ+{n#Or0-O)XL=8~vHbBXa7zgK@J_Zg~pOSFeI zEo!RT$oR=zSx_eMVxv@nU6io!q%Kzb@fci;@1&+?e;S=_^T$)0&3`jLQW}fBMd~oe zZGOd1Fw#DD{|^Qtm1BAl3mNI_@b_i4-{khtlL`JLy4>N{(-{tbILarLfhogT334o% z3aQoM?`04y_Cc;e+7(6kETG2U8EHaJ|4x(SmP&!|kRN=!5PN+CS*2zt?)8PJ(u{b_ zH-{^b&Ci&k;T5{jsDns)t@z66GwEy-d3OcH)H?3=;xA!#_ecsQ}K)Tezzl+W959sMlU-|FsTP``VdBypzC;25S5PnNztH%HC zrtiWBMpv;1&|A*=;aE#@pbxL$@;U#Vu&WOA;5x25?|+;hF4^%7MvMGtQ2Pb{7}hw# zR7PVy_NRvTUhunMr*kA5HqF{#@EPkd`4VHDV zA}Bvp?k{DP?{qGg`%@XdNfJOJA~{JF5y|1GP#Q4Nb)^AAcus{sf$3+LCV+yK6@dt_ zmdwG~){LzRM5;LfrLuKsj6XAcY@A=m@JppC;1{!EKB1pfUKLvRWWR+j9`7%pq4EA- zxbf0>oECC9smlKXtel(@m`}o)6a2$uw+tm%=w5i5;O}B0cckh^qEb!C{PTt?y@Q*A zTK6DCZd83lbxze!e!EmEqc~2Qtng5lfR5V&*@^`0y)AG{R#2LR6<|mRX&`wv`GXdw zb!=lBzCdcMxA+Hn2Pvm%r{!A$@LbmfHgR@I4%%@&mfHib7-A$jIb>$YK*Z z1|kNyf*ea7YXkeZ6;dshhimrkN&hl#gcQOIF9`=u`G+z&tx^!(0v1o0I|G>{c?UPj z9+s=EWkF+8bA+3Ml9WIBsXv{f*}>0|0x~ldv0Hx{Vkh4&QVZN^whj&5;(cw}hc*1#ZE z`^e9P?pFKpjnrL%q;T}=z;(<3*d($n>V10(TT#|!Tl%IiCZndVmK`rq6 zoBQ8WlBoG|U?ev|Lit_Qf;abapaNE`LqY)_#scm&z4z;Yh1$;sGO6oqz#5MFCcrVA zAk7B?UX8xr243Q?lUmUOqk(?@+dz?`;OET2WLooGUzK&0slv z!tLJ&p5yAJc^E4swCP`gkGTwKF2*YGMdiN(TR4j(CvR}K-~2mp4>v=ajUH@M_YZ+j zIhQ1-WRQTEiE2Y4K}?awPnk8ToZ&m*!gBekvg# z58P(z>eIzSQ1JdjP3cr~C?^th4Bi*)L-(3PRrHU)rbUG|^Gmwa*l{E@I8YWGps>ub zKWL(w^576Ixl4tGU=hZY2NM{+p^HJ+FQ)|Q6nf!TJYISfZt1eYd2-j8ziy8B5X)&fL>=4yfYOM|?(Xb8N zI8_}ip&EN|MR#YA?QJBxi1|B{U`#fg(Lc@2(VWq+>QySr+)XW=y;nmG#kpN^*iJSp zM?SkkNi4@-?4dooLPxp9U9ngLmU;anp>O%-F1eN*xq!8ghOTJ-CMgs{KYTQ_HBVa9 z6%B+!<;zU6Y?&y(uT^8~+Zzi8TU_;R3z{3-P?`{glsP<1+U`((7)=@~1@&D~$~t#^ z5;|^NxxCA)SP-NR=u1o}KVqYPVM60ne07%z9En6vHZ~MVK!`himDr;FN36b`4?Sa% zjiFKPAW;KNPnx2Q<%U>&Di_keqAh1Hv18Q^*)7OZBL&ta$Ce19W=^R+&kgLthq-dP zVPakFgL=u)l?w7GownZ>5>Pqpl54t9{uj4~R`ABI6bv>hUuoF4g@&_ZZbE|e5bN$6 z-nA{1%W&mgy+Il_=9BwF&vWy;msn~SvV};ugc_+h!jxbmM#q3D4v3JsrM?2U(H5PoYQ>vKo!(q?qKGcP3 zQ#HkcQ$4`{!fy%vQ)sJF<%x`7KMC=u8rfQQMR5!ln-I&2JRF|&=22hGHPvP>g*mxre#-FTXg)SJmc8D#vUA?jZtn+g5$A8; zXfLiT^)`UIMm8RvYh}m6`Z`ufHw$W+vVD2yvTk#;y}o3Qy*@4Qa-iqh|9A=is$-4u z__cdrWzVwCmEH5(dzP=l%Chd|E8Asi*we~RfJ3b;5BuxcNO-QAjhb5CP*W}Q98J5j zb76P;f)!nZ1LnZzf$pxAEBZ&hgr^(XL8uVfWY3lsc6FRK6h#{!-i~GH2xvRRTt#@h zEKFngQ0UfSBfh4?;aCn1hhW(&t;DiNnvZ3>Bx2blRbW{u6=69`nu6tY$&95E(VbpPh%rV|7^E zSB%B3Y%F#rU~yN3z%cAiWwM9gq)@TybJ`cQ$Fz@Y4}+O1ephKf)V`%Xp#Ae>4@H?- z7}HVnIh@muReEL}97mO&kMAF6U_CEvQM{*s6ZemcfJe3|lHoi)dA{7HhzfDJS(jVk za^r*2tK#7?*pj%N^x%N4wlH zE_bZU9p`fEUG8x%x54F(ce#x&_js4v?L8R=L(G_DfURhiaYFAG=^zq9E4ZmyUd}sq)g;2< z%X88p^;+?Lu93s(ltRQh@f9vA6h)E&pRE&jsE3r;RSCny!ti==QfOwTJd59cXJg^$ zdhzjzSs@XO!eF93Oo_!}qe(V~xuK-9cg~9bSz`x zQy>p%&5&s3e55|M4-APd$=jGPkM4Kv4B5DNlAvn1WOCe!6x>~hDJxV8Asos}S93ZO4mTY( zUnky}s2PvXsVIsfCZi|=+m$$K%njX3(eQa&`}@SkY2b8;1>BHPZUDPn;(9JL9-mXd za=j>VoB^K;;h_Ayd>lTTz;=VUn7dZ*rQdghxQbg7=cC^u4szkK_*?{cZxOe0y)pQl z1Q|Dq%Q!X~pEF_ajpD|H*-@2S~Itb8LG^{ zB;^|H+A1DPEgU`}d#Ra;t6OXk1PO}}&dT@hy)v-$D7q8WW$~lu& z=Z?Ysk|Ar5;bzgm`}^~0?!24DdFt7bNHEE?!3bw=7Gu;|Ik`5k8u&;mw~Ja;mQUUN z`i`$2_b3lLu9!x?VU>xIOf3`Eq0uG$KZL<2G>;3fVh9E2Mu=o&e)- z7v~FEQ&726Of=H~H{CAAsWYwlSw0~YwU^iP{uUrtplU??fU8JHmXp8+;wf%z8a_7x zdk5}}_EhY)3pUynkygDGc2Clor zGwP+uNH5O}uGuN3235xX8E(e|JH^{MW0IHotUrl&aSIZWxD7u3llT!AJqh1e!P&dT zTN0BeqHbAa?Ua%dEIG15`?b^IaAf+|^ncULpsvE*yTyeb<%f@1Ah@6tlC+KJZnr;2PgN7Y3l62JUy59i)NOaJ{@Tt9^(Nq zpXJgE~CT^9v{A4Wd$$&%YPGp;&ge)z5-4k5^v-d<>E9NpzVI~W3C_v-#38c0r4qRr1C4f z6Xo+boY~HVRcY_iP~mN%UA)OpwYsl(ziPFIW1 zRUYX^2d#bUILU{-EJuI4ezoCI04~tK$msR0BN{uuGS{@M-Rhj-% z7w9A66B9Zs@uS{E3k;MJAl!|Dmc}A@COnXL*L&A~(;oKwtcfwe9~J zQFc^Zd0^z6$W7SHgloeJbY19z#$#k+R?Xn^xVuA-D4RmuA*^CCI|Alo5=g^icXv4u} zm}o4lIU&ZQ!4526-z=h}0hGh9P0qRy%#=!WK;>}#F z)7!zrC&gO@NlCB=^uUc!cS_W&eC7|=dixI*aUh)%pI5hw*e}{^q^D}% zdv?AcUeBsct+IZ|6lpJtX;Pl6am=X~ymoz;s&N&&4(2 zvmQ>LM$^{Rh$BgXMK6o5a@7s^Tmtqp;!9j#JwDgK*t6od9;LbR%5J|G`z8~1o9=q; zbDBQ{m--#m64aXWTtui{SrRfs@e+H5c~jP#2Y6{hKQhSoM7<)KSXE^o_LyO6f$6V` zpSHRlW#6MK)mgD;>(+Z&H#T2#A z%|sqxeXP;|-#mgM{@}cLtGaR-lGVs-u>N&1C3uifQ*|7eOx0oV((7U`XJ3kplc4ww z@d&3}f-Q=Ge^aEBbc<11CKI(W_?MB2Zp}D*S6BB^b8EB1>)6VphAVH14RUdZdT9@? zP^yV`!mhV)Qb7{W({G`^s8SZmgE5tX>K$>iIy1P$MY<{@G`u4=s#h(<4y#O3G(0Gi z_*ESHuzyO7o-6O50ZpULL|q3u0! zLz2Dw|Jfq*|7@||Bnk6TB4bT$hBn5MO1o@KqP;o$s)^mvTNw>sUl7M)MBugW5O{N+ zx~B{0Fd&~l``#A|IiEY^x%j@Azp{zwHF98Q|tG#3(pYLDVCkizfuf8f@zDHa+{fn552`8ZhnNrh!cm zYtZ;(`GXh_Q!a}eghezWl2PEUF~DP&#iF319)GWTGMN4?ekWv6jBtISX#7uNBJcN! zAv3Ew7Q~;#MZAHAA%z3}{*!nIZ=`aNTLe8ni+3soEBk>5hxIP2p(RufSX1Qv8TN}9 z=ii^O=VpE^Tyq1@!^~gA&HNgQajMz~r+*O#`6#NwN)tGK72W(w%3~z~F8(U^X=*5k zYgcHbdFUu!!4owoXj)Vq1ieS0W5!Ix4KR%)P=BhT`4>3FQPKq^n( z7FJLrR?roVg>V%~4E34<`0K`+H0V^3TlnP^<32?v{Hh{f@XgeK#0~J3n!G0T(s5Wp zz{3Ei1u~xZtB)MSPJ@XWvQ>zrdZh4L_9rzYl@A1NhtJgHz&I`0Cv2c`NP*G05&o_v zmaC`Xlzc_Zh>kptgN^lec}z#%=DTPNRysXVNYk&nFSJcc1Y+d;oo{k|y{3@D;gKmM6Sn@p|LQ|1P>Af@J2+J$zWc-p2v-RW; zK~GcgOTH|b+HtaE{09~EjU%56iz&uFiWd3n0t3lV<>vdg9h}zJ#K9p0xm}n-laQp) z*rJE3crrni?LWTZ@OTmphvG>@AnXXdI3mPTjFJ>hCZ_>b8OfAU{=Jvx86z1JwC2G# zaM?(u z#WSJbD}M@6GoQ$sbs}lAiB3c1H<+Y!gOqNubW4#ohWXj+|6|A)v@Vv1Y7DSFkqq+L zeqcp@<1rEzyGY zJtBbaO%etl=*d{nN0C{YaykY1wHPE@Ec8T?jG&NExh6nuH0kqOuR#6P{omAd_`}9U zOxOY4GVN~7CCz-bN7c=b4}CAxrd*)-jXl6lU>3=1TO3PssRL(&(O0Z!5hvMjVK3MR zRzlzPBqr4BMZqgC)EZ$Rhb%>X#n=jA#!qv|kh+eR;mmt170{bY@`a!qX7wD(C2Cga zrn7PI4YI}_&m&XN()l-qT~{#?S})Xgz`#`U7v4cjkxvPj^T|nm5}k!T> zfzDrR>fxqBvKF45PO5}zYRBf8*c`?c5~Dghh`9v_&uLQVOr$9@N-28inL(0NSyOYp zy6VF%56>X|T%LDYS6{&4B2ufOp|l7mVuu$kL_*6n11sK%v`j$N3~Yh>SJ&!c=LFKm zm(xO|C>Ti~rxNohf)NUgcD)Y1rMqR6vN^K+ZLJW{HWRFxNc1p$B6)UVBQ3zD7^W{| z@Xt~(s%hYCxY{t%Q$C6C>_|kd9Ie)(3v3mYXqtMM@C9M-=)Ton(RK-sswLI!{5fuK z=v~S=ikp+>}}5lrWRF;tX)8`jJw!dww~6E7j@Y@3To1I7-P9 zG`B5CgsP^8qou^a*}P7`a_qiY6GzCbx{#DndP>=DJ^xPV^y0FEQa0 z!@{(`Vg6>Px>?o6_jB&hXO%lc)+=sfw}JT^{kQjgcy!LQm=<8 zjbx)nQqqM;gX?92#~VrfC^*YYbg7XX6XsJ4{ZL63>f!MwlBf!}^;&kMQq8YC+;(P((6jZ9SMdib;CV zw)Yj2xA<0yzzZHLbtUBd$l*HKQLDRBW0sd!%j{voE5l~$_G-5a`Kq&g0e7A9$B+f+ zTJMn+ld7bR6hkM4(=%;8S;wNEIDiAfj*M`uTQNC{LkcJQrdbq05gc;FV)B3{g04XN3d~&@ z;ibhWu1uT%A7r-H$HQe+eZ4B+Ib3}O=a-OytOayAE{V-V3mN=NZ=yOhE2r)Km^m_6 z4fc1=>s-|7Oa5f}ydbO0K@Z<7BNO28Qu13`Jza(zQVleR!N1Oy%@w0Klvdk)AxDmP zNZz&{Y#X7_ND&l47u+;udN@!^CSCQ-Ua2KU&Igx*{0y2pa(9ZJA}%9)9vIVE zV5L3SX%5V8Ab;CmTi=@|?}{-96Nj$}yH)qRR-@4hTJ=;_C%=}vQF*sAA!MuK3q>RQ z9Gi$>aptC&`TSUkLl#9ac@Ssvz)CW}>m|%fDk|hRBfL&hVkIA57$i3$Erx;E9a%-( ze7vN!3l_W@ALsn{f=nyV{fl+GH!+mSena}klPOWfm z4LQL1qGazgn6D*W{1UnmMbZLWuO*-H^XUex)WN!SUDH2HqJDxgYY2|A4E|s8BLR5;|4MY zx`)Wlq~Tn;26qc4S{+8}uvk{p0bG&;e)rav=|6tJzP%w9uInIWqgLlHL=1CTS-CCoCeq&6lP00lop1N zdbo8P$p|FIcdSDv;LUC1GCy6?B6o-9^_xgOE3A~{TL$Pf=s~}oBnLLFeLFdtJbaBL z;3UvTTWG+)k~S$6XZ)0Z%Q$R-!vWqo(6EdT*Z{c#7yHONJTJ)?4jk}YKiSMDNC@O8 z%AjU|{F#rD5XeEz{PzI4WxusylgVrH70FCkx9&+@ymqtZf@YTbcfLRLJLOQw9STom zM~zw`NJ}L%j<(y<0OxNfd4eBO#3RZRIYJUyp-YnS5U&eb4CqS-U>bCRI^YbDa$$uu z9%qjf7D%{*qzi#_99h)fL9$q3y=25W=U8YC+;k@~3&AXo)$`z;q>>d>QalpaEVK|* zJLOX(a70=?sXK|N&=gAsB*3%QU;y{sBw^H=(ZvmAd;pG=D^R+NT+2sGm~2PgKD7&x znFI;5?dSk??k2lcG)lsJJKCkAo5&7+oD_?dOu%y!_#_E4?uu^sHzoTtu~IbliB~}o z47*oC?YRKz%(!!83@BO?lQd8H4t!DF>CM_aubqaR#*{j8AWFA z?H#x(JujJhnuG~s3}+hP_>cx>?TrRFyS6dWbM}6c$!erZNd`J_AL?Pz4wAwuMFL`nAt$LxqpjKdQ&@f8n+$`$LX~XcW!tJxuWqdz@C9D;1$s zs?ZU^OwFSNvm<_=&k6@0C9T0YPOboXj4a`;k{sz3z{`)3Lwu$r2X%#VWdiK;kZt&R zdf@W}AdlnWl_6oW8IRgakCVUQQ9DhRr)SR-=#21j5+;@rtb5`q63y~Gl8lBd^t|yj zS-=YIQXbaY(DFms5t4QFL|S`9z$M`f!hF3Zv;~w%?)Kk2@fdSQr_n@1xR+51f6Kvc| zR`F9M8AvIFD|^YKso4@<&QPR`))~k01^t}^EBgn#Kv3e-%R0-`T$s%EiRE~=?Xrfi{>Pb>GDl$5IeUlM{%}pgRc#1qD zESDVElYF5C3SS^oxLmK11A9?(v}e-`#Kb1oN^_8v#YCGJ{Ie9uNr{_^%bL9lgbDUQ z!VZqlK)78(%8T_R4(l zG(zF|l8mm9TNhL^GU>tzehetC!dzjb&i|d6VsW>p3 z&IfTezC7Bu%8g zOKa3j7aCP{+~v?g<@ykpVhdZ%yexZ3f8{qyA`Ynq9(kRt{$}6v%g|*z2mhSDS15SFG&rS?*f^N8l8L4`w&VYZsvL9LVs8sqz~*|0W{; z{Zg~M7_vexehXz6uoOTOTV}xD-@=f6P-?>7>Y({;@||XbBm)!;xabCGdj4n4maL4F65H zg*Qktg5iLz&yqXP8dqVZ3R0g#Yg`~z;=QyL=%2x7&ygAGEWg`tg~$s{Mo-xZQp@iD zrpY~C_G&cD$4uC#VR5=V?P8%^y_x@7c{HSneVzGOUbs^lx>S9QCzXeMeyFJw$wfbh&7nrN-KHD-5&VEng;Ol)N z2Y>v4cDYQ#w6J1@r{G6Y!fJ*Y3GZ?!@=chAxbZSEk2& z&iNiceptS4;qY|wP6Nx=OGDUa7i?BJ&uAo0l92-pYK?G>+NoFhcZ(Iyu52;GPQiIZ zBk__9D5T3Yoq}`Rf72Z06_N}yl)*fWbBHgL)?uXq{-tp~DXfxYK*5_n=DE<@l{48VnK$GWeN06=hQpCL z-ouY}zNfiPF&yT#NKza8812lsY63mUG0s^Ep>9}*0zhZo2&?0q=~qo|$4jm8a4gRG zizaVai!Acxm>o{)op_%!=tTc9ODqWPSGfY67P-T6IF7Cm_K_Ix zw5S3B26(#&n0Rj&$K#z_gtTEbHb5Yu1e%Oa#2|uJduB?j6JFS-&_Pv7Yc>4)5iuD$ zhVfd+wd0-Z1>LX;=i4J|4{LJf2T=z6DZxn%wGZ?1!k{#6-|9>ZBI>t4=)6goIn3d= z65qE;|M|^69D23gM8VPQbE~XYY`IN4oII?=!D1NFV1(xt&eTzeiS^-@c!&sbA|@3u z(ZN^nWQcRu{)krhh`U;bl$J1I*XlNFw`gt`o={K2tj?*>o0WHkyrHnOPccj6lthNzOEu zpE*1Z=aK`R$;S4Sz=1v0J#}xtNc=j$Rn+Buqng2 zDV6pPk42IcQyabH=CEQo0tsxUC59!;x^-0c{>$V(GOgy?&i<9lyZZ;s zcr!aYcn0IoTrz@MgOyB(iE@55($R`%@!{4hyGM@UPu>(^?M!$>nD^IO4Ph#rh>ZU% Wgysb2B|c>se;0uONyS9xU;YQd(EgwR diff --git a/test_company_parser.py b/test_company_parser.py deleted file mode 100644 index 515c41a..0000000 --- a/test_company_parser.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for the company parser with manual JSON parsing. -""" - -import asyncio -import os -import sys - -sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app") - -import pandas as pd -from dotenv import load_dotenv -from services.llm_parser import InvestorProcessor - -# Load environment variables from root directory -load_dotenv("/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/.env") - -# Also check if API key is set (not needed for companies now but for consistency) -if not os.getenv("OPENROUTER_API_KEY"): - print("⚠️ WARNING: OPENROUTER_API_KEY not found in environment") - print("This is OK for companies (no LLM needed), but will fail for investors") - - -async def test_parser(): - """Test the new company parser with a small sample""" - print("πŸ§ͺ Testing Manual Company JSON Parser (No LLM)\n") - - # Load the company data - df = pd.read_csv( - "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/data/300 Companies data.csv" - ) - - # Process just the first 3 rows for testing - test_df = df.head(3) - - processor = InvestorProcessor() - - print(f"Processing {len(test_df)} test companies...\n") - results = await processor.parse_companies(test_df, save_to_db=False) - - print("\n" + "=" * 80) - print("πŸ“Š TEST RESULTS") - print("=" * 80) - - for idx, result in enumerate(results, 1): - print(f"\n{idx}. {result.get('name')}") - print(f" Website: {result.get('website')}") - print(f" Location: {result.get('location')}") - print(f" Industry: {result.get('industry')}") - print( - f" Founded: {result.get('founded_year')}" - if result.get("founded_year") - else " Founded: Unknown" - ) - print(f" Executives: {len(result.get('key_executives', []))}") - if result.get("key_executives"): - for exec_member in result.get("key_executives", [])[:3]: # Show first 3 - print(f" - {exec_member.get('name')} ({exec_member.get('title')})") - print(f" Investors: {len(result.get('investor_names', []))}") - if result.get("investor_names"): - print( - f" - {', '.join(result.get('investor_names', [])[:5])}" - ) # Show first 5 - print(f" Client Categories: {len(result.get('client_categories', []))}") - if result.get("client_categories"): - print( - f" - {', '.join(result.get('client_categories', [])[:3])}" - ) # Show first 3 - - print("\n" + "=" * 80) - print(f"βœ… Successfully processed {len(results)}/{len(test_df)} companies") - print("πŸŽ‰ No LLM calls needed - 100% manual parsing!") - print("=" * 80) - - -if __name__ == "__main__": - asyncio.run(test_parser()) diff --git a/verify_schema.py b/verify_schema.py deleted file mode 100644 index cbe20e4..0000000 --- a/verify_schema.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -""" -Quick test to verify the database schema matches between app and preprocessor. -""" - -import sys - -sys.path.insert(0, "/home/oluwasanmi/Documents/Work/MKD/anton_wireframe/app") - -from db.db import engine -from sqlalchemy import inspect - -# Get table info -inspector = inspect(engine) - -print("πŸ” Checking database schema...") -print(f"Database: {engine.url}\n") - -# Check investors table -if "investors" in inspector.get_table_names(): - print("βœ… 'investors' table exists") - columns = inspector.get_columns("investors") - - print("\nColumns in 'investors' table:") - for col in columns: - print(f" - {col['name']}: {col['type']}") - - # Check for stage_focus - column_names = [col["name"] for col in columns] - if "stage_focus" in column_names: - print("\n⚠️ WARNING: 'stage_focus' column still exists in database!") - print(" This should be removed as it's deprecated.") - else: - print("\nβœ… Good: 'stage_focus' column not in database (as expected)") - - # Check for required columns - required_columns = [ - "aum", - "investment_thesis", - "portfolio_highlights", - "linked_documents", - "researcher_notes", - "sources", - ] - missing = [col for col in required_columns if col not in column_names] - - if missing: - print(f"\n❌ Missing columns: {', '.join(missing)}") - else: - print("\nβœ… All required enriched columns present") - -else: - print("❌ 'investors' table not found!") - -print("\n" + "=" * 60) -print("Schema verification complete!") -print("=" * 60)