"""Configuration settings for DS Task AI News""" import os from typing import List from pydantic_settings import BaseSettings from dotenv import load_dotenv load_dotenv() class Settings(BaseSettings): # API Keys cohere_api_key: str = os.getenv("COHERE_API_KEY", "") groq_api_key: str = os.getenv("GROQ_API_KEY", "") # Vector Database vector_db_type: str = os.getenv("VECTOR_DB_TYPE", "faiss") vector_dimension: int = int(os.getenv("VECTOR_DIMENSION", "384")) # RSS Feeds @property def rss_feeds(self) -> List[str]: feeds_str = os.getenv( "RSS_FEEDS", "https://feeds.bbci.co.uk/news/technology/rss.xml," "https://techcrunch.com/feed/," "https://www.wired.com/feed/rss" ) return [feed.strip() for feed in feeds_str.split(",") if feed.strip()] # Server Settings host: str = os.getenv("HOST", "0.0.0.0") port: int = int(os.getenv("PORT", "8000")) debug: bool = os.getenv("DEBUG", "true").lower() == "true" # Data Storage (paths relative to project root) @property def raw_news_dir(self) -> str: base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) return os.getenv("RAW_NEWS_DIR", os.path.join(base_path, "data", "raw_news")) @property def processed_news_dir(self) -> str: base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) return os.getenv("PROCESSED_NEWS_DIR", os.path.join(base_path, "data", "processed_news")) @property def vector_index_path(self) -> str: base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) return os.getenv("VECTOR_INDEX_PATH", os.path.join(base_path, "data", "news_vectors.faiss")) # Embedding Model (will download automatically on first use) embedding_model: str = "all-MiniLM-L6-v2" # News Processing max_articles_per_feed: int = 50 similarity_threshold: float = 0.1 # Very low threshold for maximum recall settings = Settings()