Initial project setup

This commit is contained in:
Ayomide
2025-07-07 22:08:02 +01:00
parent c158262a49
commit b76a3e75f3
11 changed files with 208 additions and 0 deletions
+13
View File
@@ -0,0 +1,13 @@
import os
from dotenv import load_dotenv
load_dotenv()
class Config:
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
RSS_FEEDS = [
"http://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
"https://feeds.bbci.co.uk/news/technology/rss.xml"
]
VECTOR_DB_PATH = "data/vector_db.index"
+8
View File
@@ -0,0 +1,8 @@
import cohere
from backend.config import Config
co = cohere.Client(Config.COHERE_API_KEY)
def get_embeddings(texts):
response = co.embed(texts=texts, model="embed-english-v3.0")
return response.embeddings
+20
View File
@@ -0,0 +1,20 @@
from fastapi import FastAPI
from backend.news_fetcher import fetch_news
from backend.recommender import recommend_similar
from backend.config import Config
app = FastAPI()
@app.get("/fetch-news")
async def get_latest_news():
all_news = []
for feed in Config.RSS_FEEDS:
all_news.extend(fetch_news(feed))
return {"news": all_news}
@app.get("/recommend")
async def recommend_news(article_id: str):
sample_text = "AI breakthroughs in 2024"
similar_ids = recommend_similar(sample_text)
return {"similar_articles": similar_ids}
+26
View File
@@ -0,0 +1,26 @@
# backend/news_fetcher.py
from datetime import datetime
import feedparser
def fetch_news(rss_url):
feed = feedparser.parse(rss_url)
articles = []
for entry in feed.entries:
try:
# Try parsing with timezone first
pub_date = datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S %z")
except ValueError:
try:
# Fallback to GMT format without timezone
pub_date = datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S %Z")
except ValueError:
# Final fallback - use current time if parsing fails
pub_date = datetime.now()
articles.append({
"title": entry.title,
"content": entry.description,
"published": pub_date,
"source": rss_url
})
return articles
+8
View File
@@ -0,0 +1,8 @@
from backend.embeddings import get_embeddings
from backend.vector_store import VectorDB
db = VectorDB()
def recommend_similar(article_text, top_k=3):
query_embed = get_embeddings([article_text])[0]
return db.search(query_embed, k=top_k)
+7
View File
@@ -0,0 +1,7 @@
fastapi
uvicorn
feedparser
cohere
python-dotenv
groq
numpy
+14
View File
@@ -0,0 +1,14 @@
import numpy as np
import faiss
from backend.config import Config
class VectorDB:
def __init__(self):
self.index = faiss.IndexFlatL2(768) # Cohere embedding dim
def add_vectors(self, ids, embeddings):
self.index.add(np.array(embeddings).astype('float32'))
def search(self, query_embedding, k=5):
distances, indices = self.index.search(np.array([query_embedding]), k)
return indices[0]