fix: Correct data paths and embeddings fallback for production deployment

This commit is contained in:
Aherobo Ovie Victor
2025-07-07 20:49:42 +01:00
parent aaf9b7fcec
commit 762f8a8b25
4 changed files with 4147 additions and 7 deletions
+4 -4
View File
@@ -31,10 +31,10 @@ class Settings(BaseSettings):
port: int = int(os.getenv("PORT", "8000"))
debug: bool = os.getenv("DEBUG", "true").lower() == "true"
# Data Storage
raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "data/raw_news")
processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "data/processed_news")
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "data/news_vectors.faiss")
# Data Storage (paths relative to project root)
raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "../data/raw_news")
processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "../data/processed_news")
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "../data/news_vectors.faiss")
# Embedding Model
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
Binary file not shown.
File diff suppressed because it is too large Load Diff
+7 -3
View File
@@ -155,10 +155,14 @@ class EmbeddingGenerator:
return np.array(response.embeddings[0])
except Exception as e:
print(f"Cohere query embedding error: {e}")
# Fallback to sentence transformer
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
# Fallback to simple embeddings
return self._simple_text_to_vector(query)
else:
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
if self.sentence_model is not None:
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
else:
# Use simple hash-based embeddings
return self._simple_text_to_vector(query)
def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
"""Compute cosine similarity between two embeddings"""