fix: Correct data paths and embeddings fallback for production deployment
This commit is contained in:
+4
-4
@@ -31,10 +31,10 @@ class Settings(BaseSettings):
|
|||||||
port: int = int(os.getenv("PORT", "8000"))
|
port: int = int(os.getenv("PORT", "8000"))
|
||||||
debug: bool = os.getenv("DEBUG", "true").lower() == "true"
|
debug: bool = os.getenv("DEBUG", "true").lower() == "true"
|
||||||
|
|
||||||
# Data Storage
|
# Data Storage (paths relative to project root)
|
||||||
raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "data/raw_news")
|
raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "../data/raw_news")
|
||||||
processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "data/processed_news")
|
processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "../data/processed_news")
|
||||||
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "data/news_vectors.faiss")
|
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "../data/news_vectors.faiss")
|
||||||
|
|
||||||
# Embedding Model
|
# Embedding Model
|
||||||
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
||||||
|
|||||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -155,10 +155,14 @@ class EmbeddingGenerator:
|
|||||||
return np.array(response.embeddings[0])
|
return np.array(response.embeddings[0])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Cohere query embedding error: {e}")
|
print(f"Cohere query embedding error: {e}")
|
||||||
# Fallback to sentence transformer
|
# Fallback to simple embeddings
|
||||||
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
|
return self._simple_text_to_vector(query)
|
||||||
else:
|
else:
|
||||||
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
|
if self.sentence_model is not None:
|
||||||
|
return self.sentence_model.encode([query], convert_to_numpy=True)[0]
|
||||||
|
else:
|
||||||
|
# Use simple hash-based embeddings
|
||||||
|
return self._simple_text_to_vector(query)
|
||||||
|
|
||||||
def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
|
def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
|
||||||
"""Compute cosine similarity between two embeddings"""
|
"""Compute cosine similarity between two embeddings"""
|
||||||
|
|||||||
Reference in New Issue
Block a user