bbf6af58f0
- Added FastAPI application with a simple root endpoint. - Developed LLMInvestorParser class for processing investor data from CSV files. - Integrated OpenAI API for LLM enhancements and JSON cleaning. - Implemented structured data extraction and saving to SQL database. - Added functionality to save investor descriptions to ChromaDB for vector similarity search. - Created command-line interface for processing files and searching investors. - Added schema definitions for Investor and related data models using SQLAlchemy and Pydantic. - Implemented logging for better traceability and error handling. - Included requirements.txt for dependency management.
83 lines
2.7 KiB
Python
83 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Quick demonstration of the LLM Investor Parser functionality.
|
|
This script shows how to use the system programmatically.
|
|
"""
|
|
|
|
from sqlalchemy import func, select
|
|
|
|
from db import get_session
|
|
from investor_parser import InvestorParser
|
|
from schema import Investor
|
|
|
|
|
|
def main():
|
|
print("🚀 LLM Investor Parser Demo")
|
|
print("=" * 50)
|
|
|
|
# Initialize parser (without LLM for demo)
|
|
parser = InvestorParser(use_llm=False)
|
|
|
|
# Show current database stats
|
|
with get_session() as session:
|
|
count = session.scalar(select(func.count(Investor.id)))
|
|
print(f"📊 Current database: {count} investors")
|
|
|
|
# Demonstrate search functionality
|
|
print("\n🔍 Search Examples:")
|
|
|
|
search_queries = [
|
|
"circular bioeconomy sustainable",
|
|
"venture capital early stage",
|
|
"fintech financial technology",
|
|
"healthcare biotechnology",
|
|
"climate sustainability",
|
|
]
|
|
|
|
for query in search_queries:
|
|
print(f"\n🔎 Searching for: '{query}'")
|
|
results = parser.search_investors(query, limit=3)
|
|
|
|
if results and results["documents"][0]:
|
|
for i, metadata in enumerate(results["metadatas"][0]):
|
|
score = results["distances"][0][i]
|
|
print(f" {i + 1}. {metadata['name']} (score: {score:.3f})")
|
|
else:
|
|
print(" No results found")
|
|
|
|
# Show detailed investor information
|
|
print("\n📋 Detailed Investor Sample:")
|
|
|
|
with get_session() as session:
|
|
investor = session.execute(
|
|
select(Investor).where(Investor.investor_description.isnot(None)).limit(1)
|
|
).scalar_one_or_none()
|
|
|
|
if investor:
|
|
print(f"\n🏢 {investor.name}")
|
|
print(f"🌐 Website: {investor.website}")
|
|
print(f"📍 HQ: {investor.headquarters or 'Not specified'}")
|
|
print(f"📝 Description: {investor.investor_description[:200]}...")
|
|
|
|
if investor.investment_thesis_focus:
|
|
print(
|
|
f"\n🎯 Investment Focus ({len(investor.investment_thesis_focus)} areas):"
|
|
)
|
|
for i, focus in enumerate(investor.investment_thesis_focus[:3], 1):
|
|
print(f" {i}. {focus}")
|
|
if len(investor.investment_thesis_focus) > 3:
|
|
print(f" ... and {len(investor.investment_thesis_focus) - 3} more")
|
|
|
|
if investor.aum_amount:
|
|
print(f"\n💰 AUM: {investor.aum_amount}")
|
|
|
|
print("\n✅ Demo complete!")
|
|
print("\nTo run the full parser:")
|
|
print(" python investor_parser.py --file 'your_file.csv' --limit 50")
|
|
print("\nTo search investors:")
|
|
print(" python investor_parser.py --search 'your search query'")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|