Add initial project structure with configuration, utilities, and API endpoints
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# This file is intentionally left blank.
|
||||
@@ -0,0 +1,18 @@
|
||||
from datetime import datetime
|
||||
|
||||
# Cohere API Configuration
|
||||
COHERE_API_KEY = "ZlABLjvSsT86iObp9cgIgNkx2BLPs62pZiXBczw9"
|
||||
EMBEDDING_MODEL = "embed-english-v3.0" # Cohere model name
|
||||
EMBEDDING_DIMENSION = 1024 # Dimension for Cohere embeddings
|
||||
|
||||
# FAISS Configuration
|
||||
FAISS_INDEX_PATH = ""
|
||||
METADATA_PATH = ""
|
||||
|
||||
# API Configuration
|
||||
API_HOST = "0.0.0.0"
|
||||
API_PORT = 5125
|
||||
|
||||
# Logging Configuration
|
||||
CURRENT_TIME = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
CURRENT_USER = "tjc"
|
||||
@@ -0,0 +1,16 @@
|
||||
# utils/data_validator.py
|
||||
from typing import Dict, Any
|
||||
import pandas as pd
|
||||
|
||||
class DataValidator:
|
||||
@staticmethod
|
||||
def validate_crm_data(data: pd.DataFrame) -> bool:
|
||||
"""Validate CRM data structure"""
|
||||
required_columns = ['customer_id', 'interaction_date', 'interaction_type']
|
||||
return all(col in data.columns for col in required_columns)
|
||||
|
||||
@staticmethod
|
||||
def validate_training_data(data: Dict[str, Any]) -> bool:
|
||||
"""Validate training material data"""
|
||||
required_fields = ['content', 'category', 'level']
|
||||
return all(field in data for field in required_fields)
|
||||
@@ -0,0 +1,28 @@
|
||||
# utils/security.py
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
from jose import JWTError, jwt
|
||||
from passlib.context import CryptContext
|
||||
from config.settings import settings
|
||||
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
|
||||
class Security:
|
||||
@staticmethod
|
||||
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
|
||||
to_encode = data.copy()
|
||||
if expires_delta:
|
||||
expire = datetime.utcnow() + expires_delta
|
||||
else:
|
||||
expire = datetime.utcnow() + timedelta(minutes=15)
|
||||
to_encode.update({"exp": expire})
|
||||
encoded_jwt = jwt.encode(
|
||||
to_encode,
|
||||
settings.SECRET_KEY,
|
||||
algorithm=settings.ALGORITHM
|
||||
)
|
||||
return encoded_jwt
|
||||
|
||||
@staticmethod
|
||||
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
||||
return pwd_context.verify(plain_password, hashed_password)
|
||||
@@ -0,0 +1,70 @@
|
||||
import json
|
||||
from typing import List, Dict, Tuple
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import os
|
||||
import numpy as np
|
||||
from langchain_community.docstore.in_memory import InMemoryDocstore
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_cohere import CohereEmbeddings
|
||||
import faiss
|
||||
from langchain_core.documents import Document
|
||||
from config import COHERE_API_KEY, EMBEDDING_MODEL, EMBEDDING_DIMENSION
|
||||
|
||||
class VectorDB:
|
||||
def __init__(self):
|
||||
self._executor = ThreadPoolExecutor(max_workers=10)
|
||||
self.COHERE_API_KEY = COHERE_API_KEY
|
||||
os.environ["COHERE_API_KEY"] = self.COHERE_API_KEY
|
||||
self.embeddings = CohereEmbeddings(model=EMBEDDING_MODEL)
|
||||
self.index = faiss.IndexFlatL2(EMBEDDING_DIMENSION)
|
||||
self.vector_score = FAISS(
|
||||
embedding_function=self.embeddings,
|
||||
index=self.index,
|
||||
docstore=InMemoryDocstore(),
|
||||
index_to_docstore_id={},
|
||||
)
|
||||
|
||||
def load_embeddings(self, file_id: str, file_path: str):
|
||||
"""
|
||||
Load embeddings from file
|
||||
"""
|
||||
try:
|
||||
if not os.path.isdir(file_path):
|
||||
raise Exception(f"{file_path} is not a valid directory.")
|
||||
print("Files in directory: ", os.listdir(file_path))
|
||||
print("Current working directory: ", os.getcwd())
|
||||
|
||||
os.chdir("/home/kowshik/work/ds_tjc/index/faiss_index")
|
||||
print("Changed directory to: ", os.getcwd())
|
||||
|
||||
new_vector_store = FAISS.load_local(
|
||||
folder_path=file_path,
|
||||
index_name="index",
|
||||
embeddings=self.embeddings,
|
||||
allow_dangerous_deserialization=True,
|
||||
)
|
||||
return new_vector_store
|
||||
except Exception as e:
|
||||
raise Exception(f"Error loading embeddings: {str(e)}")
|
||||
|
||||
def search(self, new_vector_store, query: str, top_k: int = 5) -> List[Dict]:
|
||||
"""
|
||||
Search for similar documents and return serializable results
|
||||
"""
|
||||
try:
|
||||
raw_results = new_vector_store.similarity_search_with_score(query, k=top_k)
|
||||
|
||||
# Convert results to serializable format
|
||||
processed_results = []
|
||||
for doc, score in raw_results:
|
||||
processed_result = {
|
||||
'content': doc.page_content,
|
||||
'metadata': doc.metadata,
|
||||
'score': float(score) # Convert numpy.float32 to Python float
|
||||
}
|
||||
processed_results.append(processed_result)
|
||||
|
||||
return processed_results
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error during search: {str(e)}")
|
||||
Reference in New Issue
Block a user