172 lines
6.5 KiB
Python
172 lines
6.5 KiB
Python
"""Test the complete DS Task AI News pipeline"""
|
|
import sys
|
|
import os
|
|
sys.path.append('backend')
|
|
|
|
def test_complete_pipeline():
|
|
"""Test the entire news processing pipeline"""
|
|
print("🚀 Testing Complete DS Task AI News Pipeline")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
# Step 1: Test News Fetching
|
|
print("\n1️⃣ Testing News Fetching...")
|
|
from news_fetcher import NewsFetcher
|
|
|
|
fetcher = NewsFetcher()
|
|
result = fetcher.fetch_and_save_news()
|
|
|
|
if result["success"]:
|
|
print(f"✅ Fetched {result['articles_count']} articles")
|
|
articles = result["articles"]
|
|
|
|
if articles:
|
|
print(f" Sample article: {articles[0]['title'][:50]}...")
|
|
print(f" Source: {articles[0]['source']}")
|
|
else:
|
|
print("❌ No articles in result")
|
|
return False
|
|
else:
|
|
print(f"❌ News fetching failed: {result.get('message', 'Unknown error')}")
|
|
return False
|
|
|
|
# Step 2: Test Embeddings Generation
|
|
print("\n2️⃣ Testing Embeddings Generation...")
|
|
from embeddings import EmbeddingGenerator
|
|
|
|
embedding_gen = EmbeddingGenerator()
|
|
|
|
# Test with first few articles
|
|
test_articles = articles[:3]
|
|
embeddings = embedding_gen.generate_embeddings(test_articles)
|
|
|
|
if embeddings is not None and len(embeddings) > 0:
|
|
print(f"✅ Generated embeddings shape: {embeddings.shape}")
|
|
else:
|
|
print("❌ Embeddings generation failed")
|
|
return False
|
|
|
|
# Step 3: Test Vector Store
|
|
print("\n3️⃣ Testing Vector Store...")
|
|
from vector_store import VectorStore
|
|
|
|
vector_store = VectorStore()
|
|
vector_store.add_articles(test_articles, embeddings)
|
|
|
|
stats = vector_store.get_stats()
|
|
print(f"✅ Vector store stats: {stats['total_articles']} articles")
|
|
|
|
# Test similarity search
|
|
query_embedding = embedding_gen.generate_query_embedding("artificial intelligence technology")
|
|
similar_articles = vector_store.search_similar(query_embedding, top_k=2)
|
|
|
|
if similar_articles:
|
|
print(f"✅ Found {len(similar_articles)} similar articles")
|
|
for i, article in enumerate(similar_articles):
|
|
print(f" {i+1}. {article['title'][:40]}... (score: {article['similarity_score']:.3f})")
|
|
else:
|
|
print("⚠️ No similar articles found (might be due to threshold)")
|
|
|
|
# Step 4: Test Recommender System
|
|
print("\n4️⃣ Testing Recommender System...")
|
|
from recommender import NewsRecommender
|
|
|
|
recommender = NewsRecommender()
|
|
|
|
# Add articles to recommender's store
|
|
store_result = recommender.add_articles_to_store(articles[:5])
|
|
if store_result["success"]:
|
|
print(f"✅ Added {store_result['articles_added']} articles to recommender")
|
|
else:
|
|
print(f"❌ Failed to add articles: {store_result['message']}")
|
|
return False
|
|
|
|
# Test query-based recommendations
|
|
recommendations = recommender.recommend_by_query("technology news", top_k=3)
|
|
if recommendations:
|
|
print(f"✅ Query recommendations: {len(recommendations)} articles")
|
|
for i, rec in enumerate(recommendations):
|
|
print(f" {i+1}. {rec['title'][:40]}... (score: {rec['similarity_score']:.3f})")
|
|
else:
|
|
print("⚠️ No query recommendations found")
|
|
|
|
# Test trending articles
|
|
trending = recommender.get_trending_articles(top_k=3)
|
|
if trending:
|
|
print(f"✅ Trending articles: {len(trending)} articles")
|
|
else:
|
|
print("⚠️ No trending articles found")
|
|
|
|
# Step 5: Test FastAPI Integration
|
|
print("\n5️⃣ Testing FastAPI Integration...")
|
|
|
|
# Test if server is running
|
|
import requests
|
|
try:
|
|
response = requests.get("http://localhost:8000/health", timeout=5)
|
|
if response.status_code == 200:
|
|
print("✅ FastAPI server is running")
|
|
health_data = response.json()
|
|
print(f" Vector store has {health_data.get('vector_store', {}).get('total_articles', 0)} articles")
|
|
else:
|
|
print(f"⚠️ FastAPI server responded with status {response.status_code}")
|
|
except requests.exceptions.RequestException:
|
|
print("⚠️ FastAPI server not accessible (might not be running)")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("🎉 COMPLETE PIPELINE TEST SUCCESSFUL!")
|
|
print("✅ News fetching working")
|
|
print("✅ Embeddings generation working")
|
|
print("✅ Vector storage working")
|
|
print("✅ Similarity search working")
|
|
print("✅ Recommendation system working")
|
|
print("✅ All components integrated successfully")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Pipeline test failed with error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
def test_api_endpoints():
|
|
"""Test API endpoints if server is running"""
|
|
print("\n🌐 Testing API Endpoints...")
|
|
|
|
import requests
|
|
base_url = "http://localhost:8000"
|
|
|
|
endpoints_to_test = [
|
|
("GET", "/", "Health check"),
|
|
("GET", "/health", "Detailed health"),
|
|
("POST", "/fetch-news", "Fetch news"),
|
|
("GET", "/trending", "Trending articles"),
|
|
("GET", "/stats", "System stats")
|
|
]
|
|
|
|
for method, endpoint, description in endpoints_to_test:
|
|
try:
|
|
if method == "GET":
|
|
response = requests.get(f"{base_url}{endpoint}", timeout=10)
|
|
else:
|
|
response = requests.post(f"{base_url}{endpoint}", timeout=10)
|
|
|
|
if response.status_code == 200:
|
|
print(f"✅ {description}: OK")
|
|
else:
|
|
print(f"⚠️ {description}: Status {response.status_code}")
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"❌ {description}: Connection error")
|
|
|
|
if __name__ == "__main__":
|
|
success = test_complete_pipeline()
|
|
|
|
if success:
|
|
print("\n🚀 Testing API endpoints...")
|
|
test_api_endpoints()
|
|
print("\n✅ SYSTEM FULLY OPERATIONAL!")
|
|
else:
|
|
print("\n❌ Pipeline needs debugging")
|