fix: Correct data paths and embeddings fallback for production deployment

This commit is contained in:
Aherobo Ovie Victor
2025-07-07 20:49:42 +01:00
parent aaf9b7fcec
commit 762f8a8b25
4 changed files with 4147 additions and 7 deletions
+4 -4
View File
@@ -31,10 +31,10 @@ class Settings(BaseSettings):
port: int = int(os.getenv("PORT", "8000")) port: int = int(os.getenv("PORT", "8000"))
debug: bool = os.getenv("DEBUG", "true").lower() == "true" debug: bool = os.getenv("DEBUG", "true").lower() == "true"
# Data Storage # Data Storage (paths relative to project root)
raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "data/raw_news") raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "../data/raw_news")
processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "data/processed_news") processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "../data/processed_news")
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "data/news_vectors.faiss") vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "../data/news_vectors.faiss")
# Embedding Model # Embedding Model
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2" embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
Binary file not shown.
File diff suppressed because it is too large Load Diff
+7 -3
View File
@@ -155,10 +155,14 @@ class EmbeddingGenerator:
return np.array(response.embeddings[0]) return np.array(response.embeddings[0])
except Exception as e: except Exception as e:
print(f"Cohere query embedding error: {e}") print(f"Cohere query embedding error: {e}")
# Fallback to sentence transformer # Fallback to simple embeddings
return self.sentence_model.encode([query], convert_to_numpy=True)[0] return self._simple_text_to_vector(query)
else: else:
return self.sentence_model.encode([query], convert_to_numpy=True)[0] if self.sentence_model is not None:
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
else:
# Use simple hash-based embeddings
return self._simple_text_to_vector(query)
def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float: def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
"""Compute cosine similarity between two embeddings""" """Compute cosine similarity between two embeddings"""