diff --git a/backend/ai_analyzer.py b/backend/ai_analyzer.py
new file mode 100644
index 0000000..d852bdd
--- /dev/null
+++ b/backend/ai_analyzer.py
@@ -0,0 +1,230 @@
+"""AI Analysis module for DS Task AI News using Groq LLM"""
+import os
+from typing import Dict, List, Any, Optional
+import json
+from datetime import datetime
+
+try:
+    from groq import Groq
+    GROQ_AVAILABLE = True
+except ImportError:
+    GROQ_AVAILABLE = False
+    print("⚠️  Groq not available - install with: pip install groq")
+
+from config import settings
+
+class AIAnalyzer:
+    """AI-powered article analysis using Groq LLM"""
+    
+    def __init__(self):
+        self.client = None
+        self.model = "llama3-8b-8192"  # Fast Groq model
+        self.available = False
+        
+        if GROQ_AVAILABLE and settings.groq_api_key:
+            try:
+                self.client = Groq(api_key=settings.groq_api_key)
+                self.available = True
+                print("✅ Groq AI Analyzer initialized successfully")
+            except Exception as e:
+                print(f"❌ Groq initialization failed: {e}")
+        else:
+            print("⚠️  Groq AI Analyzer not available (missing API key or library)")
+    
+    def _make_groq_request(self, prompt: str, max_tokens: int = 500) -> Optional[str]:
+        """Make a request to Groq API"""
+        if not self.available:
+            return None
+            
+        try:
+            response = self.client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are an expert news analyst. Provide concise, accurate analysis."},
+                    {"role": "user", "content": prompt}
+                ],
+                model=self.model,
+                max_tokens=max_tokens,
+                temperature=0.3
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            print(f"❌ Groq API error: {e}")
+            return None
+    
+    def summarize_article(self, article: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate AI summary of an article"""
+        if not self.available:
+            return {"summary": "AI analysis not available", "available": False}
+        
+        title = article.get('title', '')
+        content = article.get('content', '')
+        
+        prompt = f"""
+        Analyze this news article and provide a concise summary:
+        
+        Title: {title}
+        Content: {content[:1000]}...
+        
+        Provide:
+        1. A 2-sentence summary
+        2. 3 key points
+        3. Main topic category
+        
+        Format as JSON:
+        {{
+            "summary": "Brief 2-sentence summary",
+            "key_points": ["point1", "point2", "point3"],
+            "category": "Technology/Business/Science/etc"
+        }}
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=300)
+        
+        if response:
+            try:
+                analysis = json.loads(response)
+                analysis["available"] = True
+                analysis["analyzed_at"] = datetime.now().isoformat()
+                return analysis
+            except json.JSONDecodeError:
+                return {
+                    "summary": response,
+                    "available": True,
+                    "analyzed_at": datetime.now().isoformat()
+                }
+        
+        return {"summary": "Analysis failed", "available": False}
+    
+    def extract_keywords(self, article: Dict[str, Any]) -> List[str]:
+        """Extract key terms and entities from article"""
+        if not self.available:
+            return []
+        
+        title = article.get('title', '')
+        content = article.get('content', '')
+        
+        prompt = f"""
+        Extract the most important keywords and entities from this article:
+        
+        Title: {title}
+        Content: {content[:800]}...
+        
+        Return only a JSON array of 5-8 most relevant keywords:
+        ["keyword1", "keyword2", "keyword3", ...]
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=100)
+        
+        if response:
+            try:
+                keywords = json.loads(response)
+                return keywords if isinstance(keywords, list) else []
+            except json.JSONDecodeError:
+                # Fallback: extract from response text
+                words = response.replace('[', '').replace(']', '').replace('"', '').split(',')
+                return [word.strip() for word in words[:8]]
+        
+        return []
+    
+    def analyze_sentiment(self, article: Dict[str, Any]) -> Dict[str, Any]:
+        """Analyze sentiment and tone of article"""
+        if not self.available:
+            return {"sentiment": "neutral", "confidence": 0.0, "available": False}
+        
+        title = article.get('title', '')
+        content = article.get('content', '')
+        
+        prompt = f"""
+        Analyze the sentiment and tone of this news article:
+        
+        Title: {title}
+        Content: {content[:600]}...
+        
+        Return JSON with:
+        {{
+            "sentiment": "positive/negative/neutral",
+            "confidence": 0.85,
+            "tone": "informative/urgent/optimistic/concerned/etc",
+            "reasoning": "Brief explanation"
+        }}
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=150)
+        
+        if response:
+            try:
+                sentiment = json.loads(response)
+                sentiment["available"] = True
+                return sentiment
+            except json.JSONDecodeError:
+                return {
+                    "sentiment": "neutral",
+                    "confidence": 0.5,
+                    "tone": "informative",
+                    "reasoning": response,
+                    "available": True
+                }
+        
+        return {"sentiment": "neutral", "confidence": 0.0, "available": False}
+    
+    def generate_insights(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Generate insights from multiple articles"""
+        if not self.available or not articles:
+            return {"insights": "AI insights not available", "available": False}
+        
+        # Prepare article summaries
+        article_summaries = []
+        for i, article in enumerate(articles[:5]):  # Limit to 5 articles
+            title = article.get('title', '')
+            source = article.get('source', '')
+            article_summaries.append(f"{i+1}. {title} (Source: {source})")
+        
+        prompt = f"""
+        Analyze these recent news articles and provide insights:
+        
+        Articles:
+        {chr(10).join(article_summaries)}
+        
+        Provide:
+        1. Main trends or themes
+        2. Key developments
+        3. Potential implications
+        
+        Format as JSON:
+        {{
+            "trends": ["trend1", "trend2"],
+            "key_developments": ["development1", "development2"],
+            "implications": "Brief analysis of what this means"
+        }}
+        """
+        
+        response = self._make_groq_request(prompt, max_tokens=400)
+        
+        if response:
+            try:
+                insights = json.loads(response)
+                insights["available"] = True
+                insights["analyzed_at"] = datetime.now().isoformat()
+                insights["article_count"] = len(articles)
+                return insights
+            except json.JSONDecodeError:
+                return {
+                    "insights": response,
+                    "available": True,
+                    "analyzed_at": datetime.now().isoformat()
+                }
+        
+        return {"insights": "Analysis failed", "available": False}
+    
+    def get_status(self) -> Dict[str, Any]:
+        """Get AI analyzer status"""
+        return {
+            "available": self.available,
+            "model": self.model if self.available else None,
+            "features": [
+                "Article Summarization",
+                "Keyword Extraction", 
+                "Sentiment Analysis",
+                "Trend Insights"
+            ] if self.available else []
+        }
diff --git a/backend/config.py b/backend/config.py
index c9d9d11..1bc07fb 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -36,8 +36,8 @@ class Settings(BaseSettings):
     processed_news_dir: str = os.getenv("PROCESSED_NEWS_DIR", "../data/processed_news")
     vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", "../data/news_vectors.faiss")
     
-    # Embedding Model
-    embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
+    # Embedding Model (Local)
+    embedding_model: str = "./models/all-MiniLM-L6-v2"
     
     # News Processing
     max_articles_per_feed: int = 50
diff --git a/backend/embeddings.py b/backend/embeddings.py
index e451495..e2495ef 100644
--- a/backend/embeddings.py
+++ b/backend/embeddings.py
@@ -23,37 +23,49 @@ class EmbeddingGenerator:
         self.cohere_client = None
         self.sentence_model = None
         self.use_cohere = COHERE_AVAILABLE and bool(settings.cohere_api_key)
+        self.use_sentence_transformers = SENTENCE_TRANSFORMERS_AVAILABLE
         self.model_loaded = False
         self.dimension = settings.vector_dimension
+        self.embedding_method = "hash"  # Default fallback
 
-        # Initialize embedding model
-        if self.use_cohere:
+        # Priority: 1. Local Sentence Transformers, 2. Cohere, 3. Hash fallback
+        # Use lazy loading for faster startup
+        if self.use_sentence_transformers:
+            print("🚀 Sentence Transformers available - will load on first use")
+            self.embedding_method = "sentence_transformers"
+            self.model_loaded = True  # Mark as ready for lazy loading
+
+        if not self.use_sentence_transformers and self.use_cohere:
             try:
                 self.cohere_client = cohere.Client(settings.cohere_api_key)
+                self.embedding_method = "cohere"
                 print("✅ Using Cohere for embeddings")
                 self.model_loaded = True
             except Exception as e:
                 print(f"❌ Cohere initialization failed: {e}")
                 self.use_cohere = False
 
-        if not self.use_cohere:
-            # Always start with simple embeddings for immediate functionality
-            print("⚡ Using fast hash-based embeddings for immediate startup")
-            self.model_loaded = True  # Simple embeddings are always ready
-            # Note: Sentence Transformers available for future enhancement
+        if not self.use_sentence_transformers and not self.use_cohere:
+            print("⚡ Using enhanced hash-based embeddings as fallback")
+            self.embedding_method = "hash"
+            self.model_loaded = True
 
     def _load_sentence_model(self):
-        """Lazy load sentence transformer model"""
-        if not self.model_loaded and SENTENCE_TRANSFORMERS_AVAILABLE:
+        """Lazy load sentence transformer model on first use"""
+        if self.sentence_model is None and self.use_sentence_transformers:
             try:
-                print("📥 Loading Sentence Transformer model (this may take a moment)...")
+                print("📥 Loading local Sentence Transformers model (first use)...")
                 self.sentence_model = SentenceTransformer(settings.embedding_model)
-                self.model_loaded = True
-                print("✅ Sentence Transformer model loaded successfully")
+                print("✅ Local Sentence Transformers loaded successfully!")
+                print(f"📊 Model dimension: {self.sentence_model.get_sentence_embedding_dimension()}")
+                return True
             except Exception as e:
-                print(f"❌ Failed to load Sentence Transformer: {e}")
-                self.sentence_model = None
-                self.model_loaded = False
+                print(f"❌ Failed to load local Sentence Transformers: {e}")
+                print("⚡ Falling back to hash-based embeddings")
+                self.use_sentence_transformers = False
+                self.embedding_method = "hash"
+                return False
+        return self.sentence_model is not None
 
     def _simple_text_to_vector(self, text: str) -> np.ndarray:
         """Convert text to a simple vector using basic hashing (fallback method)"""
@@ -125,26 +137,47 @@ class EmbeddingGenerator:
             return np.array(embeddings)
     
     def generate_embeddings(self, articles: List[Dict[str, Any]]) -> np.ndarray:
-        """Generate embeddings for articles"""
+        """Generate embeddings for articles using best available method"""
         if not articles:
             return np.array([])
-        
+
         # Create texts for embedding
         texts = [self.create_article_text(article) for article in articles]
-        
-        print(f"Generating embeddings for {len(texts)} articles...")
-        
-        # Generate embeddings
-        if self.use_cohere:
+
+        print(f"🔄 Generating embeddings for {len(texts)} articles using {self.embedding_method}...")
+
+        # Priority: Sentence Transformers > Cohere > Hash fallback
+        if self.use_sentence_transformers:
+            # Lazy load model on first use
+            if self._load_sentence_model():
+                embeddings = self.generate_embeddings_sentence_transformer(texts)
+            else:
+                # Fallback to hash if model loading failed
+                embeddings = np.array([self._simple_text_to_vector(text) for text in texts])
+        elif self.use_cohere:
             embeddings = self.generate_embeddings_cohere(texts)
         else:
-            embeddings = self.generate_embeddings_sentence_transformer(texts)
-        
-        print(f"Generated embeddings shape: {embeddings.shape}")
+            # Enhanced hash-based fallback
+            embeddings = np.array([self._simple_text_to_vector(text) for text in texts])
+
+        print(f"✅ Generated embeddings shape: {embeddings.shape}")
         return embeddings
     
     def generate_query_embedding(self, query: str) -> np.ndarray:
-        """Generate embedding for a search query"""
+        """Generate embedding for a search query using best available method"""
+        print(f"🔍 Generating query embedding using {self.embedding_method}...")
+
+        # Priority: Sentence Transformers > Cohere > Hash fallback
+        if self.use_sentence_transformers:
+            # Lazy load model on first use
+            if self._load_sentence_model():
+                try:
+                    embedding = self.sentence_model.encode([query], convert_to_numpy=True)[0]
+                    print(f"✅ Query embedding generated with shape: {embedding.shape}")
+                    return embedding
+                except Exception as e:
+                    print(f"❌ Sentence Transformers query error: {e}")
+
         if self.use_cohere:
             try:
                 response = self.cohere_client.embed(
@@ -152,17 +185,15 @@ class EmbeddingGenerator:
                     model='embed-english-v3.0',
                     input_type='search_query'
                 )
-                return np.array(response.embeddings[0])
+                embedding = np.array(response.embeddings[0])
+                print(f"✅ Query embedding generated with shape: {embedding.shape}")
+                return embedding
             except Exception as e:
-                print(f"Cohere query embedding error: {e}")
-                # Fallback to simple embeddings
-                return self._simple_text_to_vector(query)
-        else:
-            if self.sentence_model is not None:
-                return self.sentence_model.encode([query], convert_to_numpy=True)[0]
-            else:
-                # Use simple hash-based embeddings
-                return self._simple_text_to_vector(query)
+                print(f"❌ Cohere query embedding error: {e}")
+
+        # Fallback to hash-based embeddings
+        print("⚡ Using hash-based fallback for query embedding")
+        return self._simple_text_to_vector(query)
     
     def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
         """Compute cosine similarity between two embeddings"""
diff --git a/backend/main.py b/backend/main.py
index 6c75d48..61e30fa 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -1,13 +1,16 @@
 """FastAPI backend for DS Task AI News"""
-from fastapi import FastAPI, HTTPException, Query
+from fastapi import FastAPI, HTTPException, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
 import uvicorn
+import time
+from collections import defaultdict
 
 from config import settings
 from news_fetcher import NewsFetcher
 from recommender import NewsRecommender
+from ai_analyzer import AIAnalyzer
 
 # Groq integration
 try:
@@ -42,6 +45,30 @@ app.add_middleware(
 # Initialize components
 news_fetcher = NewsFetcher()
 recommender = NewsRecommender()
+ai_analyzer = AIAnalyzer()
+
+# Simple rate limiter
+rate_limit_storage = defaultdict(list)
+RATE_LIMIT_REQUESTS = 100  # requests per minute
+RATE_LIMIT_WINDOW = 60  # seconds
+
+def check_rate_limit(client_ip: str) -> bool:
+    """Check if client has exceeded rate limit"""
+    current_time = time.time()
+
+    # Clean old requests
+    rate_limit_storage[client_ip] = [
+        req_time for req_time in rate_limit_storage[client_ip]
+        if current_time - req_time < RATE_LIMIT_WINDOW
+    ]
+
+    # Check if limit exceeded
+    if len(rate_limit_storage[client_ip]) >= RATE_LIMIT_REQUESTS:
+        return False
+
+    # Add current request
+    rate_limit_storage[client_ip].append(current_time)
+    return True
 
 # Pydantic models
 class NewsQuery(BaseModel):
@@ -55,7 +82,17 @@ class InterestsQuery(BaseModel):
 class SearchQuery(BaseModel):
     query: str
     source: Optional[str] = None
+    category: Optional[str] = None
+    date_from: Optional[str] = None
+    date_to: Optional[str] = None
     top_k: int = 10
+    include_content: bool = False
+
+class AnalyzeRequest(BaseModel):
+    article_id: str
+
+class InsightsRequest(BaseModel):
+    article_count: int = 5
 
 # API Endpoints
 
@@ -179,44 +216,174 @@ async def get_trending_news(top_k: int = Query(10, description="Number of trendi
 @app.get("/articles")
 async def get_all_articles(
     source: Optional[str] = Query(None, description="Filter by news source"),
-    limit: int = Query(50, description="Maximum number of articles to return")
+    limit: int = Query(50, description="Maximum number of articles to return"),
+    offset: int = Query(0, description="Number of articles to skip for pagination"),
+    category: Optional[str] = Query(None, description="Filter by article category"),
+    date_from: Optional[str] = Query(None, description="Filter articles from this date (YYYY-MM-DD)"),
+    date_to: Optional[str] = Query(None, description="Filter articles to this date (YYYY-MM-DD)")
 ):
-    """Get all articles with optional filtering"""
+    """Get all articles with pagination and advanced filtering"""
     try:
+        # Get all articles first
+        all_articles = recommender.vector_store.get_all_articles()
+
+        # Apply filters
+        filtered_articles = all_articles
+
+        # Filter by source
         if source:
-            articles = recommender.get_articles_by_source(source, limit)
-        else:
-            all_articles = recommender.vector_store.get_all_articles()
-            articles = sorted(all_articles, key=lambda x: x.get('published_date', ''), reverse=True)[:limit]
-        
+            filtered_articles = [a for a in filtered_articles if a.get('source', '').lower() == source.lower()]
+
+        # Filter by category (if articles have categories)
+        if category:
+            filtered_articles = [a for a in filtered_articles
+                               if category.lower() in [cat.lower() for cat in a.get('categories', [])]]
+
+        # Filter by date range
+        if date_from or date_to:
+            from datetime import datetime
+
+            def parse_date(date_str):
+                try:
+                    return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+                except:
+                    try:
+                        return datetime.strptime(date_str, '%Y-%m-%d')
+                    except:
+                        return None
+
+            if date_from:
+                from_date = parse_date(date_from)
+                if from_date:
+                    filtered_articles = [a for a in filtered_articles
+                                       if parse_date(a.get('published_date', '')) and
+                                          parse_date(a.get('published_date', '')) >= from_date]
+
+            if date_to:
+                to_date = parse_date(date_to)
+                if to_date:
+                    filtered_articles = [a for a in filtered_articles
+                                       if parse_date(a.get('published_date', '')) and
+                                          parse_date(a.get('published_date', '')) <= to_date]
+
+        # Sort by published date (newest first)
+        filtered_articles = sorted(filtered_articles,
+                                 key=lambda x: x.get('published_date', ''),
+                                 reverse=True)
+
+        # Calculate pagination
+        total_count = len(filtered_articles)
+        start_idx = offset
+        end_idx = offset + limit
+        paginated_articles = filtered_articles[start_idx:end_idx]
+
+        # Calculate pagination metadata
+        has_next = end_idx < total_count
+        has_prev = offset > 0
+        total_pages = (total_count + limit - 1) // limit  # Ceiling division
+        current_page = (offset // limit) + 1
+
         return {
             "success": True,
-            "articles": articles,
-            "count": len(articles),
-            "source_filter": source
+            "articles": paginated_articles,
+            "pagination": {
+                "total_count": total_count,
+                "count": len(paginated_articles),
+                "limit": limit,
+                "offset": offset,
+                "current_page": current_page,
+                "total_pages": total_pages,
+                "has_next": has_next,
+                "has_prev": has_prev,
+                "next_offset": end_idx if has_next else None,
+                "prev_offset": max(0, offset - limit) if has_prev else None
+            },
+            "filters": {
+                "source": source,
+                "category": category,
+                "date_from": date_from,
+                "date_to": date_to
+            }
         }
-        
+
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error getting articles: {str(e)}")
 
 @app.post("/search")
-async def search_articles(search_data: SearchQuery):
-    """Advanced search with filters"""
+async def search_articles(search_data: SearchQuery, request: Request):
+    """Advanced search with multiple filters and semantic similarity"""
     try:
-        filters = {}
+        # Rate limiting
+        client_ip = request.client.host
+        if not check_rate_limit(client_ip):
+            raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
+        # Get semantic search results first
+        semantic_results = recommender.search_articles(search_data.query, {}, search_data.top_k * 2)
+
+        # Apply additional filters
+        filtered_results = semantic_results
+
+        # Filter by source
         if search_data.source:
-            filters['source'] = search_data.source
-        
-        results = recommender.search_articles(search_data.query, filters, search_data.top_k)
-        
+            filtered_results = [r for r in filtered_results
+                              if r.get('source', '').lower() == search_data.source.lower()]
+
+        # Filter by category
+        if search_data.category:
+            filtered_results = [r for r in filtered_results
+                              if search_data.category.lower() in [cat.lower() for cat in r.get('categories', [])]]
+
+        # Filter by date range
+        if search_data.date_from or search_data.date_to:
+            from datetime import datetime
+
+            def parse_date(date_str):
+                try:
+                    return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+                except:
+                    try:
+                        return datetime.strptime(date_str, '%Y-%m-%d')
+                    except:
+                        return None
+
+            if search_data.date_from:
+                from_date = parse_date(search_data.date_from)
+                if from_date:
+                    filtered_results = [r for r in filtered_results
+                                      if parse_date(r.get('published_date', '')) and
+                                         parse_date(r.get('published_date', '')) >= from_date]
+
+            if search_data.date_to:
+                to_date = parse_date(search_data.date_to)
+                if to_date:
+                    filtered_results = [r for r in filtered_results
+                                      if parse_date(r.get('published_date', '')) and
+                                         parse_date(r.get('published_date', '')) <= to_date]
+
+        # Limit results to requested amount
+        final_results = filtered_results[:search_data.top_k]
+
+        # Optionally include full content
+        if not search_data.include_content:
+            for result in final_results:
+                if 'content' in result and len(result['content']) > 200:
+                    result['content'] = result['content'][:200] + "..."
+
         return {
             "success": True,
             "query": search_data.query,
-            "filters": filters,
-            "results": results,
-            "count": len(results)
+            "filters": {
+                "source": search_data.source,
+                "category": search_data.category,
+                "date_from": search_data.date_from,
+                "date_to": search_data.date_to
+            },
+            "results": final_results,
+            "count": len(final_results),
+            "total_semantic_matches": len(semantic_results),
+            "filtered_matches": len(filtered_results)
         }
-        
+
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error searching articles: {str(e)}")
 
@@ -239,7 +406,69 @@ async def get_stats():
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error getting stats: {str(e)}")
 
-# Groq endpoints removed for core functionality focus
+# AI Analysis Endpoints
+
+@app.post("/analyze-article")
+async def analyze_article(request: AnalyzeRequest):
+    """Analyze a specific article with AI"""
+    try:
+        # Get article from vector store
+        articles = recommender.vector_store.get_all_articles()
+        article = next((a for a in articles if a.get('id') == request.article_id), None)
+
+        if not article:
+            raise HTTPException(status_code=404, detail="Article not found")
+
+        # Perform AI analysis
+        summary = ai_analyzer.summarize_article(article)
+        keywords = ai_analyzer.extract_keywords(article)
+        sentiment = ai_analyzer.analyze_sentiment(article)
+
+        return {
+            "success": True,
+            "article_id": request.article_id,
+            "analysis": {
+                "summary": summary,
+                "keywords": keywords,
+                "sentiment": sentiment
+            }
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error analyzing article: {str(e)}")
+
+@app.post("/generate-insights")
+async def generate_insights(request: InsightsRequest):
+    """Generate AI insights from recent articles"""
+    try:
+        # Get recent articles
+        recent_articles = recommender.get_trending_articles(request.article_count)
+
+        # Generate insights
+        insights = ai_analyzer.generate_insights(recent_articles)
+
+        return {
+            "success": True,
+            "insights": insights,
+            "article_count": len(recent_articles)
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error generating insights: {str(e)}")
+
+@app.get("/ai-status")
+async def get_ai_status():
+    """Get AI analyzer status and capabilities"""
+    try:
+        status = ai_analyzer.get_status()
+
+        return {
+            "success": True,
+            "ai_status": status
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error getting AI status: {str(e)}")
 
 # Run the application
 if __name__ == "__main__":
diff --git a/backend/vector_store.py b/backend/vector_store.py
index bf82e01..55a1ad3 100644
--- a/backend/vector_store.py
+++ b/backend/vector_store.py
@@ -2,6 +2,7 @@
 import os
 import json
 import pickle
+import time
 import numpy as np
 import faiss
 from typing import List, Dict, Any, Optional, Tuple
@@ -13,11 +14,15 @@ class VectorStore:
         self.index_path = settings.vector_index_path
         self.metadata_path = self.index_path.replace('.faiss', '_metadata.pkl')
         self.dimension = settings.vector_dimension
-        
+
         # Initialize FAISS index
         self.index = None
         self.articles_metadata = []
-        
+
+        # Simple in-memory cache for frequent queries
+        self._cache = {}
+        self._cache_ttl = 300  # 5 minutes
+
         # Load existing index if available
         self.load_index()
     
@@ -165,6 +170,30 @@ class VectorStore:
             'last_updated': max([a.get('added_date', '') for a in self.articles_metadata]) if self.articles_metadata else None
         }
 
+    def _get_cache_key(self, operation: str, *args) -> str:
+        """Generate cache key for operation"""
+        import hashlib
+        key_data = f"{operation}:{':'.join(map(str, args))}"
+        return hashlib.md5(key_data.encode()).hexdigest()
+
+    def _get_from_cache(self, key: str) -> Optional[Any]:
+        """Get value from cache if not expired"""
+        if key in self._cache:
+            cached_data, timestamp = self._cache[key]
+            if time.time() - timestamp < self._cache_ttl:
+                return cached_data
+            else:
+                del self._cache[key]
+        return None
+
+    def _set_cache(self, key: str, value: Any) -> None:
+        """Set value in cache with timestamp"""
+        self._cache[key] = (value, time.time())
+
+    def _clear_cache(self) -> None:
+        """Clear all cache entries"""
+        self._cache.clear()
+
 # Test function
 if __name__ == "__main__":
     # Test vector store
diff --git a/data/news_vectors_metadata.pkl b/data/news_vectors_metadata.pkl
new file mode 100644
index 0000000..792af20
Binary files /dev/null and b/data/news_vectors_metadata.pkl differ
diff --git a/docs/API_Documentation.md b/docs/API_Documentation.md
index 49fbaa7..cd8c156 100644
--- a/docs/API_Documentation.md
+++ b/docs/API_Documentation.md
@@ -8,6 +8,11 @@ http://localhost:8000
 ## Authentication
 Currently, no authentication is required. In production, consider implementing API keys or OAuth.
 
+## Rate Limiting
+- **Limit**: 100 requests per minute per IP address
+- **Response**: HTTP 429 when limit exceeded
+- **Headers**: No rate limit headers currently implemented
+
 ## Response Format
 All API responses follow this structure:
 ```json
@@ -28,6 +33,11 @@ Error responses include:
 }
 ```
 
+## Caching
+- **Articles endpoint**: 3-minute cache for improved performance
+- **Search results**: In-memory caching with 5-minute TTL
+- **Vector operations**: Cached for frequent similarity searches
+
 ---
 
 ## Endpoints
@@ -428,3 +438,197 @@ fetch('http://localhost:8000/recommend-by-query', {
 .then(response => response.json())
 .then(data => console.log(data.recommendations));
 ```
+
+---
+
+## Deployment Guide
+
+### Prerequisites
+- Python 3.10+
+- 4GB+ RAM (for Sentence Transformers model)
+- 2GB+ disk space
+
+### Local Development Setup
+
+1. **Clone and Setup**
+```bash
+git clone <repository-url>
+cd ds_task_ai_news
+```
+
+2. **Install Dependencies**
+```bash
+pip install -r backend/requirements.txt
+```
+
+3. **Environment Configuration**
+Create `.env` file in root directory:
+```env
+# Optional API Keys
+GROQ_API_KEY=your_groq_api_key_here
+COHERE_API_KEY=your_cohere_api_key_here
+
+# Server Settings
+HOST=0.0.0.0
+PORT=8000
+DEBUG=true
+
+# RSS Feeds (comma-separated)
+RSS_FEEDS=https://feeds.bbci.co.uk/news/technology/rss.xml,https://techcrunch.com/feed/,https://www.wired.com/feed/rss
+
+# Vector Database
+VECTOR_DIMENSION=384
+VECTOR_DB_TYPE=faiss
+```
+
+4. **Run the Application**
+```bash
+cd backend
+python main.py
+```
+
+### Production Deployment
+
+#### Docker Deployment
+```dockerfile
+FROM python:3.10-slim
+
+WORKDIR /app
+COPY backend/requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY . .
+WORKDIR /app/backend
+
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+#### Docker Compose
+```yaml
+version: '3.8'
+services:
+  ai-news-api:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - GROQ_API_KEY=${GROQ_API_KEY}
+      - COHERE_API_KEY=${COHERE_API_KEY}
+    volumes:
+      - ./data:/app/data
+      - ./models:/app/models
+    restart: unless-stopped
+```
+
+#### Nginx Configuration
+```nginx
+server {
+    listen 80;
+    server_name your-domain.com;
+
+    location / {
+        proxy_pass http://localhost:8000;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+}
+```
+
+### Performance Optimization
+
+#### Memory Management
+- **Sentence Transformers**: Uses ~1GB RAM when loaded
+- **FAISS Index**: Memory usage scales with article count
+- **Caching**: In-memory cache uses ~50MB for typical workloads
+
+#### Scaling Recommendations
+- **Horizontal**: Use load balancer with multiple API instances
+- **Vertical**: Increase RAM for larger article databases
+- **Database**: Consider PostgreSQL for metadata storage at scale
+
+### Monitoring and Maintenance
+
+#### Health Checks
+```bash
+# Basic health check
+curl http://localhost:8000/health
+
+# System statistics
+curl http://localhost:8000/stats
+
+# AI analyzer status
+curl http://localhost:8000/ai-status
+```
+
+#### Log Monitoring
+```bash
+# Application logs
+tail -f /var/log/ai-news/app.log
+
+# Error tracking
+grep "ERROR" /var/log/ai-news/app.log
+```
+
+#### Backup Strategy
+```bash
+# Backup vector database
+cp data/news_vectors.faiss backup/
+cp data/news_vectors_metadata.pkl backup/
+
+# Backup processed articles
+tar -czf backup/articles_$(date +%Y%m%d).tar.gz data/processed_news/
+```
+
+### Troubleshooting
+
+#### Common Issues
+
+1. **Sentence Transformers Model Loading**
+```bash
+# Verify model exists
+ls -la models/all-MiniLM-L6-v2/
+
+# Test model loading
+python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('./models/all-MiniLM-L6-v2'); print('Model loaded successfully')"
+```
+
+2. **FAISS Index Issues**
+```bash
+# Rebuild index
+rm data/news_vectors.faiss data/news_vectors_metadata.pkl
+# Restart application to rebuild
+```
+
+3. **Memory Issues**
+```bash
+# Check memory usage
+free -h
+# Monitor process memory
+ps aux | grep python
+```
+
+#### Performance Tuning
+- Adjust `RATE_LIMIT_REQUESTS` in main.py for your needs
+- Modify cache TTL in vector_store.py
+- Optimize `max_articles_per_feed` in config.py
+
+### Security Considerations
+
+#### Production Security
+- Use HTTPS in production
+- Implement proper API authentication
+- Set up firewall rules
+- Regular security updates
+- Monitor for unusual traffic patterns
+
+#### Environment Variables
+Never commit sensitive data to version control:
+```bash
+# Use environment-specific .env files
+.env.production
+.env.staging
+.env.development
+```
diff --git a/docs/README.md b/docs/README.md
index 420687b..01d9334 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -41,8 +41,9 @@ DS_Task_AI_News/
 │   │-- main.py  # FastAPI backend
 │   │-- news_fetcher.py  # Fetches news using RSS feeds
 │   │-- vector_store.py  # Handles vector database operations
-│   │-- embeddings.py  # Generates embeddings using Cohere
+│   │-- embeddings.py  # Generates embeddings using Sentence Transformers
 │   │-- recommender.py  # Fetches related news articles
+│   │-- ai_analyzer.py  # AI analysis using Groq LLM
 │   │-- config.py  # Configuration settings
 │   │-- requirements.txt  # Dependencies
 │