Initial project setup
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
class Config:
|
||||
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
||||
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
||||
RSS_FEEDS = [
|
||||
"http://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
|
||||
"https://feeds.bbci.co.uk/news/technology/rss.xml"
|
||||
]
|
||||
VECTOR_DB_PATH = "data/vector_db.index"
|
||||
@@ -0,0 +1,8 @@
|
||||
import cohere
|
||||
from backend.config import Config
|
||||
|
||||
co = cohere.Client(Config.COHERE_API_KEY)
|
||||
|
||||
def get_embeddings(texts):
|
||||
response = co.embed(texts=texts, model="embed-english-v3.0")
|
||||
return response.embeddings
|
||||
@@ -0,0 +1,20 @@
|
||||
from fastapi import FastAPI
|
||||
from backend.news_fetcher import fetch_news
|
||||
from backend.recommender import recommend_similar
|
||||
from backend.config import Config
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.get("/fetch-news")
|
||||
async def get_latest_news():
|
||||
all_news = []
|
||||
for feed in Config.RSS_FEEDS:
|
||||
all_news.extend(fetch_news(feed))
|
||||
return {"news": all_news}
|
||||
|
||||
|
||||
@app.get("/recommend")
|
||||
async def recommend_news(article_id: str):
|
||||
sample_text = "AI breakthroughs in 2024"
|
||||
similar_ids = recommend_similar(sample_text)
|
||||
return {"similar_articles": similar_ids}
|
||||
@@ -0,0 +1,26 @@
|
||||
# backend/news_fetcher.py
|
||||
from datetime import datetime
|
||||
import feedparser
|
||||
|
||||
def fetch_news(rss_url):
|
||||
feed = feedparser.parse(rss_url)
|
||||
articles = []
|
||||
for entry in feed.entries:
|
||||
try:
|
||||
# Try parsing with timezone first
|
||||
pub_date = datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S %z")
|
||||
except ValueError:
|
||||
try:
|
||||
# Fallback to GMT format without timezone
|
||||
pub_date = datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S %Z")
|
||||
except ValueError:
|
||||
# Final fallback - use current time if parsing fails
|
||||
pub_date = datetime.now()
|
||||
|
||||
articles.append({
|
||||
"title": entry.title,
|
||||
"content": entry.description,
|
||||
"published": pub_date,
|
||||
"source": rss_url
|
||||
})
|
||||
return articles
|
||||
@@ -0,0 +1,8 @@
|
||||
from backend.embeddings import get_embeddings
|
||||
from backend.vector_store import VectorDB
|
||||
|
||||
db = VectorDB()
|
||||
|
||||
def recommend_similar(article_text, top_k=3):
|
||||
query_embed = get_embeddings([article_text])[0]
|
||||
return db.search(query_embed, k=top_k)
|
||||
@@ -0,0 +1,7 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
feedparser
|
||||
cohere
|
||||
python-dotenv
|
||||
groq
|
||||
numpy
|
||||
@@ -0,0 +1,14 @@
|
||||
import numpy as np
|
||||
import faiss
|
||||
from backend.config import Config
|
||||
|
||||
class VectorDB:
|
||||
def __init__(self):
|
||||
self.index = faiss.IndexFlatL2(768) # Cohere embedding dim
|
||||
|
||||
def add_vectors(self, ids, embeddings):
|
||||
self.index.add(np.array(embeddings).astype('float32'))
|
||||
|
||||
def search(self, query_embedding, k=5):
|
||||
distances, indices = self.index.search(np.array([query_embedding]), k)
|
||||
return indices[0]
|
||||
Reference in New Issue
Block a user