feat: Implement complete RSS news fetching system with multi-source support
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
"""Configuration settings for DS Task AI News"""
|
||||
import os
|
||||
from typing import List
|
||||
from pydantic_settings import BaseSettings
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# API Keys
|
||||
cohere_api_key: str = os.getenv("COHERE_API_KEY", "")
|
||||
groq_api_key: str = os.getenv("GROQ_API_KEY", "")
|
||||
|
||||
# Vector Database
|
||||
vector_db_type: str = os.getenv("VECTOR_DB_TYPE", "faiss")
|
||||
vector_dimension: int = int(os.getenv("VECTOR_DIMENSION", "384"))
|
||||
|
||||
# RSS Feeds
|
||||
@property
|
||||
def rss_feeds(self) -> List[str]:
|
||||
feeds_str = os.getenv(
|
||||
"RSS_FEEDS",
|
||||
"https://feeds.bbci.co.uk/news/technology/rss.xml,"
|
||||
"https://techcrunch.com/feed/,"
|
||||
"https://www.wired.com/feed/rss"
|
||||
)
|
||||
return [feed.strip() for feed in feeds_str.split(",") if feed.strip()]
|
||||
|
||||
# Server Settings
|
||||
host: str = os.getenv("HOST", "0.0.0.0")
|
||||
port: int = int(os.getenv("PORT", "8000"))
|
||||
debug: bool = os.getenv("DEBUG", "true").lower() == "true"
|
||||
|
||||
# Data Storage
|
||||
raw_news_dir: str = os.getenv("RAW_NEWS_DIR", "data/raw_news")
|
||||
processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "data/processed_news")
|
||||
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "data/news_vectors.faiss")
|
||||
|
||||
# Embedding Model
|
||||
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
||||
|
||||
# News Processing
|
||||
max_articles_per_feed: int = 50
|
||||
similarity_threshold: float = 0.7
|
||||
|
||||
settings = Settings()
|
||||
Reference in New Issue
Block a user