from typing import Optional import chromadb from langchain_openai import ChatOpenAI from pydantic_schemas import Investor, InvestorList from settings import settings # Add these imports for your databases # from sqlalchemy.ext.asyncio import AsyncSession # from your_vector_db import VectorDBClient class QueryProcessor: def __init__( self, sql_session: Optional[object] = None, vector_db_client: Optional[object] = None, ): self.llm = ChatOpenAI( api_key=settings.OPENROUTER_API_KEY, base_url="https://openrouter.ai/api/v1", model="openai/gpt-oss-120b:free", temperature=0, ) self.structured_llm = self.llm.with_structured_output(InvestorList) self.sql_session = sql_session self.vector_db_client = vector_db_client self.vector_db_client = chromadb.PersistentClient(path="./chroma_db") self.collection = self.vector_db_client.get_or_create_collection( name="investor_descriptions", metadata={ "description": "Investor descriptions and investment thesis focus" }, ) def query_sql_database(self, query: str) -> Optional[InvestorList]: """Query the SQL database for investor information.""" if not self.sql_session: return None # Implement SQL querying logic here result = self.sql_session.execute(query) investors = result.scalars().all() return InvestorList(investors=investors) def query_vector_database(self, query: str) -> Optional[InvestorList]: """Query the vector database for investor information.""" if not self.vector_db_client: return None # Implement vector database querying logic here results = self.vector_db_client.query(collection=self.collection, query=query) investors = [Investor(**doc.metadata) for doc in results.documents] return InvestorList(investors=investors) def process_query(self, question: str) -> InvestorList: """Process a query using the LLM and return structured investor data.""" response = self.structured_llm.predict(question=question) return response