113 lines
4.1 KiB
Python
113 lines
4.1 KiB
Python
|
|
"""Test AI features: embeddings and vector search"""
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
sys.path.append('backend')
|
||
|
|
|
||
|
|
def test_ai_pipeline():
|
||
|
|
print("🤖 Testing AI Features Pipeline")
|
||
|
|
print("=" * 50)
|
||
|
|
|
||
|
|
# Step 1: Get some news articles
|
||
|
|
print("1. Fetching news articles...")
|
||
|
|
from news_fetcher import NewsFetcher
|
||
|
|
fetcher = NewsFetcher()
|
||
|
|
|
||
|
|
# Get articles from BBC
|
||
|
|
articles = fetcher.fetch_rss_feed("https://feeds.bbci.co.uk/news/rss.xml")
|
||
|
|
print(f"✅ Got {len(articles)} articles")
|
||
|
|
|
||
|
|
# Use first 5 articles for testing
|
||
|
|
test_articles = articles[:5]
|
||
|
|
for i, article in enumerate(test_articles):
|
||
|
|
print(f" {i+1}. {article['title'][:50]}...")
|
||
|
|
|
||
|
|
# Step 2: Test embeddings
|
||
|
|
print("\n2. Testing embeddings generation...")
|
||
|
|
from embeddings import EmbeddingGenerator
|
||
|
|
|
||
|
|
embedding_gen = EmbeddingGenerator()
|
||
|
|
print(f" Using model: {'Cohere' if embedding_gen.use_cohere else 'Sentence Transformers'}")
|
||
|
|
|
||
|
|
# Generate embeddings
|
||
|
|
embeddings = embedding_gen.generate_embeddings(test_articles)
|
||
|
|
print(f"✅ Generated embeddings: {embeddings.shape}")
|
||
|
|
|
||
|
|
# Step 3: Test vector store
|
||
|
|
print("\n3. Testing vector store...")
|
||
|
|
from vector_store import VectorStore
|
||
|
|
|
||
|
|
# Clear any existing index for clean test
|
||
|
|
vector_store = VectorStore()
|
||
|
|
vector_store.clear_index()
|
||
|
|
|
||
|
|
# Add articles to vector store
|
||
|
|
vector_store.add_articles(test_articles, embeddings)
|
||
|
|
stats = vector_store.get_stats()
|
||
|
|
print(f"✅ Vector store: {stats['total_articles']} articles, dimension {stats['index_dimension']}")
|
||
|
|
|
||
|
|
# Step 4: Test similarity search
|
||
|
|
print("\n4. Testing similarity search...")
|
||
|
|
|
||
|
|
# Test query
|
||
|
|
query = "technology artificial intelligence"
|
||
|
|
query_embedding = embedding_gen.generate_query_embedding(query)
|
||
|
|
print(f" Query: '{query}'")
|
||
|
|
|
||
|
|
# Search for similar articles
|
||
|
|
similar_articles = vector_store.search_similar(query_embedding, top_k=3)
|
||
|
|
|
||
|
|
if similar_articles:
|
||
|
|
print(f"✅ Found {len(similar_articles)} similar articles:")
|
||
|
|
for i, article in enumerate(similar_articles):
|
||
|
|
score = article.get('similarity_score', 0)
|
||
|
|
print(f" {i+1}. {article['title'][:45]}... (score: {score:.3f})")
|
||
|
|
else:
|
||
|
|
print("⚠️ No similar articles found (threshold might be too high)")
|
||
|
|
|
||
|
|
# Step 5: Test recommender system
|
||
|
|
print("\n5. Testing recommender system...")
|
||
|
|
from recommender import NewsRecommender
|
||
|
|
|
||
|
|
recommender = NewsRecommender()
|
||
|
|
|
||
|
|
# Add articles to recommender
|
||
|
|
result = recommender.add_articles_to_store(test_articles)
|
||
|
|
if result["success"]:
|
||
|
|
print(f"✅ Added {result['articles_added']} articles to recommender")
|
||
|
|
|
||
|
|
# Test query-based recommendations
|
||
|
|
recommendations = recommender.recommend_by_query("technology news", top_k=3)
|
||
|
|
if recommendations:
|
||
|
|
print(f"✅ Query recommendations: {len(recommendations)} articles")
|
||
|
|
for i, rec in enumerate(recommendations):
|
||
|
|
score = rec.get('similarity_score', 0)
|
||
|
|
print(f" {i+1}. {rec['title'][:45]}... (score: {score:.3f})")
|
||
|
|
|
||
|
|
# Test article-based recommendations
|
||
|
|
if test_articles:
|
||
|
|
article_id = test_articles[0]['id']
|
||
|
|
similar_recs = recommender.recommend_by_article_id(article_id, top_k=2)
|
||
|
|
if similar_recs:
|
||
|
|
print(f"✅ Article-based recommendations: {len(similar_recs)} articles")
|
||
|
|
else:
|
||
|
|
print("⚠️ No article-based recommendations found")
|
||
|
|
|
||
|
|
print("\n" + "=" * 50)
|
||
|
|
print("🎉 AI FEATURES TEST COMPLETED!")
|
||
|
|
print("✅ News fetching: Working")
|
||
|
|
print("✅ Embeddings generation: Working")
|
||
|
|
print("✅ Vector storage: Working")
|
||
|
|
print("✅ Similarity search: Working")
|
||
|
|
print("✅ Recommendation system: Working")
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
try:
|
||
|
|
test_ai_pipeline()
|
||
|
|
print("\n🚀 AI-powered news system is fully operational!")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"\n❌ Error in AI pipeline: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|