Add initial project structure with configuration, utilities, and API endpoints

This commit is contained in:
2025-02-07 19:24:57 +06:00
parent 480f6f06c2
commit 87e7b99daa
21 changed files with 513 additions and 159 deletions
+1
View File
@@ -0,0 +1 @@
# This file is intentionally left blank.
+18
View File
@@ -0,0 +1,18 @@
from datetime import datetime
# Cohere API Configuration
COHERE_API_KEY = "ZlABLjvSsT86iObp9cgIgNkx2BLPs62pZiXBczw9"
EMBEDDING_MODEL = "embed-english-v3.0" # Cohere model name
EMBEDDING_DIMENSION = 1024 # Dimension for Cohere embeddings
# FAISS Configuration
FAISS_INDEX_PATH = ""
METADATA_PATH = ""
# API Configuration
API_HOST = "0.0.0.0"
API_PORT = 5125
# Logging Configuration
CURRENT_TIME = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
CURRENT_USER = "tjc"
+16
View File
@@ -0,0 +1,16 @@
# utils/data_validator.py
from typing import Dict, Any
import pandas as pd
class DataValidator:
@staticmethod
def validate_crm_data(data: pd.DataFrame) -> bool:
"""Validate CRM data structure"""
required_columns = ['customer_id', 'interaction_date', 'interaction_type']
return all(col in data.columns for col in required_columns)
@staticmethod
def validate_training_data(data: Dict[str, Any]) -> bool:
"""Validate training material data"""
required_fields = ['content', 'category', 'level']
return all(field in data for field in required_fields)
+28
View File
@@ -0,0 +1,28 @@
# utils/security.py
from datetime import datetime, timedelta
from typing import Optional
from jose import JWTError, jwt
from passlib.context import CryptContext
from config.settings import settings
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
class Security:
@staticmethod
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=15)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(
to_encode,
settings.SECRET_KEY,
algorithm=settings.ALGORITHM
)
return encoded_jwt
@staticmethod
def verify_password(plain_password: str, hashed_password: str) -> bool:
return pwd_context.verify(plain_password, hashed_password)
+70
View File
@@ -0,0 +1,70 @@
import json
from typing import List, Dict, Tuple
from concurrent.futures import ThreadPoolExecutor
import os
import numpy as np
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_cohere import CohereEmbeddings
import faiss
from langchain_core.documents import Document
from config import COHERE_API_KEY, EMBEDDING_MODEL, EMBEDDING_DIMENSION
class VectorDB:
def __init__(self):
self._executor = ThreadPoolExecutor(max_workers=10)
self.COHERE_API_KEY = COHERE_API_KEY
os.environ["COHERE_API_KEY"] = self.COHERE_API_KEY
self.embeddings = CohereEmbeddings(model=EMBEDDING_MODEL)
self.index = faiss.IndexFlatL2(EMBEDDING_DIMENSION)
self.vector_score = FAISS(
embedding_function=self.embeddings,
index=self.index,
docstore=InMemoryDocstore(),
index_to_docstore_id={},
)
def load_embeddings(self, file_id: str, file_path: str):
"""
Load embeddings from file
"""
try:
if not os.path.isdir(file_path):
raise Exception(f"{file_path} is not a valid directory.")
print("Files in directory: ", os.listdir(file_path))
print("Current working directory: ", os.getcwd())
os.chdir("/home/kowshik/work/ds_tjc/index/faiss_index")
print("Changed directory to: ", os.getcwd())
new_vector_store = FAISS.load_local(
folder_path=file_path,
index_name="index",
embeddings=self.embeddings,
allow_dangerous_deserialization=True,
)
return new_vector_store
except Exception as e:
raise Exception(f"Error loading embeddings: {str(e)}")
def search(self, new_vector_store, query: str, top_k: int = 5) -> List[Dict]:
"""
Search for similar documents and return serializable results
"""
try:
raw_results = new_vector_store.similarity_search_with_score(query, k=top_k)
# Convert results to serializable format
processed_results = []
for doc, score in raw_results:
processed_result = {
'content': doc.page_content,
'metadata': doc.metadata,
'score': float(score) # Convert numpy.float32 to Python float
}
processed_results.append(processed_result)
return processed_results
except Exception as e:
raise Exception(f"Error during search: {str(e)}")