DS_TASK_AI_VIEWS/backend/config.py

"""Configuration settings for DS Task AI News"""
import os
from typing import List
from pydantic_settings import BaseSettings
from dotenv import load_dotenv

load_dotenv()

class Settings(BaseSettings):
    # API Keys
    cohere_api_key: str = os.getenv("COHERE_API_KEY", "")
    groq_api_key: str = os.getenv("GROQ_API_KEY", "")

    # Vector Database
    vector_db_type: str = os.getenv("VECTOR_DB_TYPE", "faiss")
    vector_dimension: int = int(os.getenv("VECTOR_DIMENSION", "384"))

    # RSS Feeds
    @property
    def rss_feeds(self) -> List[str]:
        feeds_str = os.getenv(
            "RSS_FEEDS",
            "https://feeds.bbci.co.uk/news/technology/rss.xml,"
            "https://techcrunch.com/feed/,"
            "https://www.wired.com/feed/rss"
        )
        return [feed.strip() for feed in feeds_str.split(",") if feed.strip()]

    # Server Settings
    host: str = os.getenv("HOST", "0.0.0.0")
    port: int = int(os.getenv("PORT", "8000"))
    debug: bool = os.getenv("DEBUG", "true").lower() == "true"

    # Data Storage (paths relative to project root)
    @property
    def raw_news_dir(self) -> str:
        base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        return os.getenv("RAW_NEWS_DIR", os.path.join(base_path, "data", "raw_news"))

    @property
    def processed_news_dir(self) -> str:
        base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        return os.getenv("PROCESSED_NEWS_DIR", os.path.join(base_path, "data", "processed_news"))

    @property
    def vector_index_path(self) -> str:
        base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        return os.getenv("VECTOR_INDEX_PATH", os.path.join(base_path, "data", "news_vectors.faiss"))

    # Embedding Model (will download automatically on first use)
    embedding_model: str = "all-MiniLM-L6-v2"

    # News Processing
    max_articles_per_feed: int = 50
    similarity_threshold: float = 0.1  # Very low threshold for maximum recall

settings = Settings()