2025-07-08 19:57:35 +01:00
|
|
|
from fastapi import FastAPI, HTTPException
|
|
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
from .news_fetcher import fetch_all_news, save_raw_news, save_processed_news
|
2025-07-24 16:35:04 +01:00
|
|
|
from .recommender import recommend_similar, process_articles_for_vector_db, news_recommender
|
2025-07-08 19:57:35 +01:00
|
|
|
from .recommender import analyze_article_with_groq
|
|
|
|
|
from .recommender import get_personalized_recommendations, vector_db
|
|
|
|
|
from .vector_store import VectorDB
|
|
|
|
|
from .config import Config
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
app = FastAPI(title="DS Task AI News", version="1.0.0")
|
|
|
|
|
|
|
|
|
|
# Add CORS middleware
|
|
|
|
|
app.add_middleware(
|
|
|
|
|
CORSMiddleware,
|
|
|
|
|
allow_origins=["*"],
|
|
|
|
|
allow_credentials=True,
|
|
|
|
|
allow_methods=["*"],
|
|
|
|
|
allow_headers=["*"],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.on_event("startup")
|
|
|
|
|
async def startup_event():
|
|
|
|
|
"""Initialize the application"""
|
|
|
|
|
# Create necessary directories
|
|
|
|
|
os.makedirs(Config.RAW_NEWS_PATH, exist_ok=True)
|
|
|
|
|
os.makedirs(Config.PROCESSED_NEWS_PATH, exist_ok=True)
|
|
|
|
|
os.makedirs("data", exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Load existing vector database if available
|
|
|
|
|
if os.path.exists(Config.VECTOR_DB_PATH):
|
|
|
|
|
vector_db.load_index(Config.VECTOR_DB_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/")
|
|
|
|
|
async def root():
|
|
|
|
|
"""Root endpoint"""
|
|
|
|
|
return {"message": "DS Task AI News API", "version": "1.0.0"}
|
2025-07-07 22:08:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/fetch-news")
|
2025-07-08 19:57:35 +01:00
|
|
|
async def fetch_news():
|
2025-07-24 16:35:04 +01:00
|
|
|
"""Fetch news from RSS feeds with duplicate detection"""
|
2025-07-08 19:57:35 +01:00
|
|
|
try:
|
|
|
|
|
articles = fetch_all_news()
|
|
|
|
|
|
|
|
|
|
if not articles:
|
|
|
|
|
raise HTTPException(status_code=404, detail="No articles found")
|
2025-07-07 22:08:02 +01:00
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
# Save raw news
|
|
|
|
|
raw_file = save_raw_news(articles)
|
2025-07-07 22:08:02 +01:00
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
# Process articles for vector database
|
|
|
|
|
process_articles_for_vector_db(articles)
|
|
|
|
|
|
|
|
|
|
# Save processed news
|
|
|
|
|
processed_file = save_processed_news(articles)
|
|
|
|
|
|
|
|
|
|
# Save vector database
|
|
|
|
|
vector_db.save_index(Config.VECTOR_DB_PATH)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"message": "News fetched successfully",
|
|
|
|
|
"articles_count": len(articles),
|
|
|
|
|
"raw_file": raw_file,
|
|
|
|
|
"processed_file": processed_file,
|
|
|
|
|
"articles": articles
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise HTTPException(status_code=500, detail=f"Error fetching news: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/recommend-news")
|
2025-07-07 22:08:02 +01:00
|
|
|
async def recommend_news(article_id: str):
|
2025-07-24 16:35:04 +01:00
|
|
|
"""Retrieve similar news based on the selected article (backward compatibility)"""
|
2025-07-08 19:57:35 +01:00
|
|
|
try:
|
|
|
|
|
recommendations = recommend_similar(article_id)
|
|
|
|
|
|
|
|
|
|
if not recommendations:
|
|
|
|
|
raise HTTPException(status_code=404, detail="No recommendations found")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"article_id": article_id,
|
|
|
|
|
"recommendations": recommendations,
|
|
|
|
|
"count": len(recommendations)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise HTTPException(status_code=500, detail=f"Error getting recommendations: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
2025-07-24 16:35:04 +01:00
|
|
|
@app.get("/recommend-by-text")
|
|
|
|
|
async def recommend_by_text(text_description: str, top_n: int = 3):
|
|
|
|
|
"""Recommend articles based on text description"""
|
|
|
|
|
try:
|
|
|
|
|
recommendations = news_recommender.recommend_by_text(text_description, top_n)
|
|
|
|
|
|
|
|
|
|
if not recommendations:
|
|
|
|
|
raise HTTPException(status_code=404, detail="No recommendations found")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"text_description": text_description,
|
|
|
|
|
"recommendations": recommendations,
|
|
|
|
|
"count": len(recommendations)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise HTTPException(status_code=500, detail=f"Error getting recommendations: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
@app.get("/analyze-article")
|
|
|
|
|
async def analyze_article(article_id: str):
|
|
|
|
|
"""Analyze article using Groq LLM"""
|
|
|
|
|
try:
|
|
|
|
|
article = vector_db.get_article_by_id(article_id)
|
|
|
|
|
|
|
|
|
|
if not article:
|
|
|
|
|
raise HTTPException(status_code=404, detail="Article not found")
|
|
|
|
|
|
|
|
|
|
article_text = f"{article['title']} {article['content']}"
|
|
|
|
|
analysis = analyze_article_with_groq(article_text)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"article_id": article_id,
|
|
|
|
|
"article": article,
|
|
|
|
|
"analysis": analysis
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise HTTPException(status_code=500, detail=f"Error analyzing article: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/personalized-recommendations")
|
|
|
|
|
async def personalized_recommendations(interests: str):
|
|
|
|
|
"""Get personalized recommendations based on user interests"""
|
|
|
|
|
try:
|
|
|
|
|
recommendations = get_personalized_recommendations(interests)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"interests": interests,
|
|
|
|
|
"recommendations": recommendations,
|
|
|
|
|
"count": len(recommendations)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise HTTPException(status_code=500, detail=f"Error getting personalized recommendations: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/health")
|
|
|
|
|
async def health_check():
|
|
|
|
|
"""Health check endpoint"""
|
|
|
|
|
return {"status": "healthy", "database_articles": len(vector_db.articles)}
|
|
|
|
|
|
2025-07-24 16:35:04 +01:00
|
|
|
|
2025-07-08 19:57:35 +01:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
import uvicorn
|
2025-07-24 16:35:04 +01:00
|
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|