Files
DS_TASK_AI_VIEWS/test_complete_pipeline.py
T

172 lines
6.5 KiB
Python

"""Test the complete DS Task AI News pipeline"""
import sys
import os
sys.path.append('backend')
def test_complete_pipeline():
"""Test the entire news processing pipeline"""
print("🚀 Testing Complete DS Task AI News Pipeline")
print("=" * 60)
try:
# Step 1: Test News Fetching
print("\n1️⃣ Testing News Fetching...")
from news_fetcher import NewsFetcher
fetcher = NewsFetcher()
result = fetcher.fetch_and_save_news()
if result["success"]:
print(f"✅ Fetched {result['articles_count']} articles")
articles = result["articles"]
if articles:
print(f" Sample article: {articles[0]['title'][:50]}...")
print(f" Source: {articles[0]['source']}")
else:
print("❌ No articles in result")
return False
else:
print(f"❌ News fetching failed: {result.get('message', 'Unknown error')}")
return False
# Step 2: Test Embeddings Generation
print("\n2️⃣ Testing Embeddings Generation...")
from embeddings import EmbeddingGenerator
embedding_gen = EmbeddingGenerator()
# Test with first few articles
test_articles = articles[:3]
embeddings = embedding_gen.generate_embeddings(test_articles)
if embeddings is not None and len(embeddings) > 0:
print(f"✅ Generated embeddings shape: {embeddings.shape}")
else:
print("❌ Embeddings generation failed")
return False
# Step 3: Test Vector Store
print("\n3️⃣ Testing Vector Store...")
from vector_store import VectorStore
vector_store = VectorStore()
vector_store.add_articles(test_articles, embeddings)
stats = vector_store.get_stats()
print(f"✅ Vector store stats: {stats['total_articles']} articles")
# Test similarity search
query_embedding = embedding_gen.generate_query_embedding("artificial intelligence technology")
similar_articles = vector_store.search_similar(query_embedding, top_k=2)
if similar_articles:
print(f"✅ Found {len(similar_articles)} similar articles")
for i, article in enumerate(similar_articles):
print(f" {i+1}. {article['title'][:40]}... (score: {article['similarity_score']:.3f})")
else:
print("⚠️ No similar articles found (might be due to threshold)")
# Step 4: Test Recommender System
print("\n4️⃣ Testing Recommender System...")
from recommender import NewsRecommender
recommender = NewsRecommender()
# Add articles to recommender's store
store_result = recommender.add_articles_to_store(articles[:5])
if store_result["success"]:
print(f"✅ Added {store_result['articles_added']} articles to recommender")
else:
print(f"❌ Failed to add articles: {store_result['message']}")
return False
# Test query-based recommendations
recommendations = recommender.recommend_by_query("technology news", top_k=3)
if recommendations:
print(f"✅ Query recommendations: {len(recommendations)} articles")
for i, rec in enumerate(recommendations):
print(f" {i+1}. {rec['title'][:40]}... (score: {rec['similarity_score']:.3f})")
else:
print("⚠️ No query recommendations found")
# Test trending articles
trending = recommender.get_trending_articles(top_k=3)
if trending:
print(f"✅ Trending articles: {len(trending)} articles")
else:
print("⚠️ No trending articles found")
# Step 5: Test FastAPI Integration
print("\n5️⃣ Testing FastAPI Integration...")
# Test if server is running
import requests
try:
response = requests.get("http://localhost:8000/health", timeout=5)
if response.status_code == 200:
print("✅ FastAPI server is running")
health_data = response.json()
print(f" Vector store has {health_data.get('vector_store', {}).get('total_articles', 0)} articles")
else:
print(f"⚠️ FastAPI server responded with status {response.status_code}")
except requests.exceptions.RequestException:
print("⚠️ FastAPI server not accessible (might not be running)")
print("\n" + "=" * 60)
print("🎉 COMPLETE PIPELINE TEST SUCCESSFUL!")
print("✅ News fetching working")
print("✅ Embeddings generation working")
print("✅ Vector storage working")
print("✅ Similarity search working")
print("✅ Recommendation system working")
print("✅ All components integrated successfully")
return True
except Exception as e:
print(f"\n❌ Pipeline test failed with error: {e}")
import traceback
traceback.print_exc()
return False
def test_api_endpoints():
"""Test API endpoints if server is running"""
print("\n🌐 Testing API Endpoints...")
import requests
base_url = "http://localhost:8000"
endpoints_to_test = [
("GET", "/", "Health check"),
("GET", "/health", "Detailed health"),
("POST", "/fetch-news", "Fetch news"),
("GET", "/trending", "Trending articles"),
("GET", "/stats", "System stats")
]
for method, endpoint, description in endpoints_to_test:
try:
if method == "GET":
response = requests.get(f"{base_url}{endpoint}", timeout=10)
else:
response = requests.post(f"{base_url}{endpoint}", timeout=10)
if response.status_code == 200:
print(f"{description}: OK")
else:
print(f"⚠️ {description}: Status {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"{description}: Connection error")
if __name__ == "__main__":
success = test_complete_pipeline()
if success:
print("\n🚀 Testing API endpoints...")
test_api_endpoints()
print("\n✅ SYSTEM FULLY OPERATIONAL!")
else:
print("\n❌ Pipeline needs debugging")