Add initial project structure with configuration, utilities, and API endpoints
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
import json
|
||||
from typing import List, Dict, Tuple
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import os
|
||||
import numpy as np
|
||||
from langchain_community.docstore.in_memory import InMemoryDocstore
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_cohere import CohereEmbeddings
|
||||
import faiss
|
||||
from langchain_core.documents import Document
|
||||
from config import COHERE_API_KEY, EMBEDDING_MODEL, EMBEDDING_DIMENSION
|
||||
|
||||
class VectorDB:
|
||||
def __init__(self):
|
||||
self._executor = ThreadPoolExecutor(max_workers=10)
|
||||
self.COHERE_API_KEY = COHERE_API_KEY
|
||||
os.environ["COHERE_API_KEY"] = self.COHERE_API_KEY
|
||||
self.embeddings = CohereEmbeddings(model=EMBEDDING_MODEL)
|
||||
self.index = faiss.IndexFlatL2(EMBEDDING_DIMENSION)
|
||||
self.vector_score = FAISS(
|
||||
embedding_function=self.embeddings,
|
||||
index=self.index,
|
||||
docstore=InMemoryDocstore(),
|
||||
index_to_docstore_id={},
|
||||
)
|
||||
|
||||
def load_embeddings(self, file_id: str, file_path: str):
|
||||
"""
|
||||
Load embeddings from file
|
||||
"""
|
||||
try:
|
||||
if not os.path.isdir(file_path):
|
||||
raise Exception(f"{file_path} is not a valid directory.")
|
||||
print("Files in directory: ", os.listdir(file_path))
|
||||
print("Current working directory: ", os.getcwd())
|
||||
|
||||
os.chdir("/home/kowshik/work/ds_tjc/index/faiss_index")
|
||||
print("Changed directory to: ", os.getcwd())
|
||||
|
||||
new_vector_store = FAISS.load_local(
|
||||
folder_path=file_path,
|
||||
index_name="index",
|
||||
embeddings=self.embeddings,
|
||||
allow_dangerous_deserialization=True,
|
||||
)
|
||||
return new_vector_store
|
||||
except Exception as e:
|
||||
raise Exception(f"Error loading embeddings: {str(e)}")
|
||||
|
||||
def search(self, new_vector_store, query: str, top_k: int = 5) -> List[Dict]:
|
||||
"""
|
||||
Search for similar documents and return serializable results
|
||||
"""
|
||||
try:
|
||||
raw_results = new_vector_store.similarity_search_with_score(query, k=top_k)
|
||||
|
||||
# Convert results to serializable format
|
||||
processed_results = []
|
||||
for doc, score in raw_results:
|
||||
processed_result = {
|
||||
'content': doc.page_content,
|
||||
'metadata': doc.metadata,
|
||||
'score': float(score) # Convert numpy.float32 to Python float
|
||||
}
|
||||
processed_results.append(processed_result)
|
||||
|
||||
return processed_results
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error during search: {str(e)}")
|
||||
Reference in New Issue
Block a user