feat: Implement complete RSS news fetching system with multi-source support
This commit is contained in:
@@ -0,0 +1,171 @@
|
||||
"""Test the complete DS Task AI News pipeline"""
|
||||
import sys
|
||||
import os
|
||||
sys.path.append('backend')
|
||||
|
||||
def test_complete_pipeline():
|
||||
"""Test the entire news processing pipeline"""
|
||||
print("🚀 Testing Complete DS Task AI News Pipeline")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Step 1: Test News Fetching
|
||||
print("\n1️⃣ Testing News Fetching...")
|
||||
from news_fetcher import NewsFetcher
|
||||
|
||||
fetcher = NewsFetcher()
|
||||
result = fetcher.fetch_and_save_news()
|
||||
|
||||
if result["success"]:
|
||||
print(f"✅ Fetched {result['articles_count']} articles")
|
||||
articles = result["articles"]
|
||||
|
||||
if articles:
|
||||
print(f" Sample article: {articles[0]['title'][:50]}...")
|
||||
print(f" Source: {articles[0]['source']}")
|
||||
else:
|
||||
print("❌ No articles in result")
|
||||
return False
|
||||
else:
|
||||
print(f"❌ News fetching failed: {result.get('message', 'Unknown error')}")
|
||||
return False
|
||||
|
||||
# Step 2: Test Embeddings Generation
|
||||
print("\n2️⃣ Testing Embeddings Generation...")
|
||||
from embeddings import EmbeddingGenerator
|
||||
|
||||
embedding_gen = EmbeddingGenerator()
|
||||
|
||||
# Test with first few articles
|
||||
test_articles = articles[:3]
|
||||
embeddings = embedding_gen.generate_embeddings(test_articles)
|
||||
|
||||
if embeddings is not None and len(embeddings) > 0:
|
||||
print(f"✅ Generated embeddings shape: {embeddings.shape}")
|
||||
else:
|
||||
print("❌ Embeddings generation failed")
|
||||
return False
|
||||
|
||||
# Step 3: Test Vector Store
|
||||
print("\n3️⃣ Testing Vector Store...")
|
||||
from vector_store import VectorStore
|
||||
|
||||
vector_store = VectorStore()
|
||||
vector_store.add_articles(test_articles, embeddings)
|
||||
|
||||
stats = vector_store.get_stats()
|
||||
print(f"✅ Vector store stats: {stats['total_articles']} articles")
|
||||
|
||||
# Test similarity search
|
||||
query_embedding = embedding_gen.generate_query_embedding("artificial intelligence technology")
|
||||
similar_articles = vector_store.search_similar(query_embedding, top_k=2)
|
||||
|
||||
if similar_articles:
|
||||
print(f"✅ Found {len(similar_articles)} similar articles")
|
||||
for i, article in enumerate(similar_articles):
|
||||
print(f" {i+1}. {article['title'][:40]}... (score: {article['similarity_score']:.3f})")
|
||||
else:
|
||||
print("⚠️ No similar articles found (might be due to threshold)")
|
||||
|
||||
# Step 4: Test Recommender System
|
||||
print("\n4️⃣ Testing Recommender System...")
|
||||
from recommender import NewsRecommender
|
||||
|
||||
recommender = NewsRecommender()
|
||||
|
||||
# Add articles to recommender's store
|
||||
store_result = recommender.add_articles_to_store(articles[:5])
|
||||
if store_result["success"]:
|
||||
print(f"✅ Added {store_result['articles_added']} articles to recommender")
|
||||
else:
|
||||
print(f"❌ Failed to add articles: {store_result['message']}")
|
||||
return False
|
||||
|
||||
# Test query-based recommendations
|
||||
recommendations = recommender.recommend_by_query("technology news", top_k=3)
|
||||
if recommendations:
|
||||
print(f"✅ Query recommendations: {len(recommendations)} articles")
|
||||
for i, rec in enumerate(recommendations):
|
||||
print(f" {i+1}. {rec['title'][:40]}... (score: {rec['similarity_score']:.3f})")
|
||||
else:
|
||||
print("⚠️ No query recommendations found")
|
||||
|
||||
# Test trending articles
|
||||
trending = recommender.get_trending_articles(top_k=3)
|
||||
if trending:
|
||||
print(f"✅ Trending articles: {len(trending)} articles")
|
||||
else:
|
||||
print("⚠️ No trending articles found")
|
||||
|
||||
# Step 5: Test FastAPI Integration
|
||||
print("\n5️⃣ Testing FastAPI Integration...")
|
||||
|
||||
# Test if server is running
|
||||
import requests
|
||||
try:
|
||||
response = requests.get("http://localhost:8000/health", timeout=5)
|
||||
if response.status_code == 200:
|
||||
print("✅ FastAPI server is running")
|
||||
health_data = response.json()
|
||||
print(f" Vector store has {health_data.get('vector_store', {}).get('total_articles', 0)} articles")
|
||||
else:
|
||||
print(f"⚠️ FastAPI server responded with status {response.status_code}")
|
||||
except requests.exceptions.RequestException:
|
||||
print("⚠️ FastAPI server not accessible (might not be running)")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("🎉 COMPLETE PIPELINE TEST SUCCESSFUL!")
|
||||
print("✅ News fetching working")
|
||||
print("✅ Embeddings generation working")
|
||||
print("✅ Vector storage working")
|
||||
print("✅ Similarity search working")
|
||||
print("✅ Recommendation system working")
|
||||
print("✅ All components integrated successfully")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Pipeline test failed with error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def test_api_endpoints():
|
||||
"""Test API endpoints if server is running"""
|
||||
print("\n🌐 Testing API Endpoints...")
|
||||
|
||||
import requests
|
||||
base_url = "http://localhost:8000"
|
||||
|
||||
endpoints_to_test = [
|
||||
("GET", "/", "Health check"),
|
||||
("GET", "/health", "Detailed health"),
|
||||
("POST", "/fetch-news", "Fetch news"),
|
||||
("GET", "/trending", "Trending articles"),
|
||||
("GET", "/stats", "System stats")
|
||||
]
|
||||
|
||||
for method, endpoint, description in endpoints_to_test:
|
||||
try:
|
||||
if method == "GET":
|
||||
response = requests.get(f"{base_url}{endpoint}", timeout=10)
|
||||
else:
|
||||
response = requests.post(f"{base_url}{endpoint}", timeout=10)
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"✅ {description}: OK")
|
||||
else:
|
||||
print(f"⚠️ {description}: Status {response.status_code}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ {description}: Connection error")
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_complete_pipeline()
|
||||
|
||||
if success:
|
||||
print("\n🚀 Testing API endpoints...")
|
||||
test_api_endpoints()
|
||||
print("\n✅ SYSTEM FULLY OPERATIONAL!")
|
||||
else:
|
||||
print("\n❌ Pipeline needs debugging")
|
||||
Reference in New Issue
Block a user