feat: Implement AI-powered embeddings and vector similarity search system
This commit is contained in:
@@ -0,0 +1,112 @@
|
||||
"""Test AI features: embeddings and vector search"""
|
||||
import sys
|
||||
import os
|
||||
sys.path.append('backend')
|
||||
|
||||
def test_ai_pipeline():
|
||||
print("🤖 Testing AI Features Pipeline")
|
||||
print("=" * 50)
|
||||
|
||||
# Step 1: Get some news articles
|
||||
print("1. Fetching news articles...")
|
||||
from news_fetcher import NewsFetcher
|
||||
fetcher = NewsFetcher()
|
||||
|
||||
# Get articles from BBC
|
||||
articles = fetcher.fetch_rss_feed("https://feeds.bbci.co.uk/news/rss.xml")
|
||||
print(f"✅ Got {len(articles)} articles")
|
||||
|
||||
# Use first 5 articles for testing
|
||||
test_articles = articles[:5]
|
||||
for i, article in enumerate(test_articles):
|
||||
print(f" {i+1}. {article['title'][:50]}...")
|
||||
|
||||
# Step 2: Test embeddings
|
||||
print("\n2. Testing embeddings generation...")
|
||||
from embeddings import EmbeddingGenerator
|
||||
|
||||
embedding_gen = EmbeddingGenerator()
|
||||
print(f" Using model: {'Cohere' if embedding_gen.use_cohere else 'Sentence Transformers'}")
|
||||
|
||||
# Generate embeddings
|
||||
embeddings = embedding_gen.generate_embeddings(test_articles)
|
||||
print(f"✅ Generated embeddings: {embeddings.shape}")
|
||||
|
||||
# Step 3: Test vector store
|
||||
print("\n3. Testing vector store...")
|
||||
from vector_store import VectorStore
|
||||
|
||||
# Clear any existing index for clean test
|
||||
vector_store = VectorStore()
|
||||
vector_store.clear_index()
|
||||
|
||||
# Add articles to vector store
|
||||
vector_store.add_articles(test_articles, embeddings)
|
||||
stats = vector_store.get_stats()
|
||||
print(f"✅ Vector store: {stats['total_articles']} articles, dimension {stats['index_dimension']}")
|
||||
|
||||
# Step 4: Test similarity search
|
||||
print("\n4. Testing similarity search...")
|
||||
|
||||
# Test query
|
||||
query = "technology artificial intelligence"
|
||||
query_embedding = embedding_gen.generate_query_embedding(query)
|
||||
print(f" Query: '{query}'")
|
||||
|
||||
# Search for similar articles
|
||||
similar_articles = vector_store.search_similar(query_embedding, top_k=3)
|
||||
|
||||
if similar_articles:
|
||||
print(f"✅ Found {len(similar_articles)} similar articles:")
|
||||
for i, article in enumerate(similar_articles):
|
||||
score = article.get('similarity_score', 0)
|
||||
print(f" {i+1}. {article['title'][:45]}... (score: {score:.3f})")
|
||||
else:
|
||||
print("⚠️ No similar articles found (threshold might be too high)")
|
||||
|
||||
# Step 5: Test recommender system
|
||||
print("\n5. Testing recommender system...")
|
||||
from recommender import NewsRecommender
|
||||
|
||||
recommender = NewsRecommender()
|
||||
|
||||
# Add articles to recommender
|
||||
result = recommender.add_articles_to_store(test_articles)
|
||||
if result["success"]:
|
||||
print(f"✅ Added {result['articles_added']} articles to recommender")
|
||||
|
||||
# Test query-based recommendations
|
||||
recommendations = recommender.recommend_by_query("technology news", top_k=3)
|
||||
if recommendations:
|
||||
print(f"✅ Query recommendations: {len(recommendations)} articles")
|
||||
for i, rec in enumerate(recommendations):
|
||||
score = rec.get('similarity_score', 0)
|
||||
print(f" {i+1}. {rec['title'][:45]}... (score: {score:.3f})")
|
||||
|
||||
# Test article-based recommendations
|
||||
if test_articles:
|
||||
article_id = test_articles[0]['id']
|
||||
similar_recs = recommender.recommend_by_article_id(article_id, top_k=2)
|
||||
if similar_recs:
|
||||
print(f"✅ Article-based recommendations: {len(similar_recs)} articles")
|
||||
else:
|
||||
print("⚠️ No article-based recommendations found")
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("🎉 AI FEATURES TEST COMPLETED!")
|
||||
print("✅ News fetching: Working")
|
||||
print("✅ Embeddings generation: Working")
|
||||
print("✅ Vector storage: Working")
|
||||
print("✅ Similarity search: Working")
|
||||
print("✅ Recommendation system: Working")
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
test_ai_pipeline()
|
||||
print("\n🚀 AI-powered news system is fully operational!")
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error in AI pipeline: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user