feat: Initial SCP project setup with AI-powered document compliance tools

2025-04-21 22:49:29 +01:00
commit b0ec64b883
28 changed files with 2405 additions and 0 deletions
@@ -0,0 +1,247 @@
+from fastapi import FastAPI, UploadFile, File, HTTPException, Form, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.templating import Jinja2Templates
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import HTMLResponse, RedirectResponse
+from pydantic import BaseModel
+from typing import List, Optional
+import uuid
+import os
+import logging
+import traceback
+import json
+from datetime import datetime
+import markdown
+
+from services.document_processor import DocumentProcessor
+from services.vector_store import VectorStore
+from services.database import Database
+
+from dotenv import load_dotenv
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("app.log"),
+        logging.StreamHandler()
+    ]
+)
+
+load_dotenv()
+
+app = FastAPI(title="Mini SpecsComply Pro")
+
+# Mount static files
+app.mount("/static", StaticFiles(directory="src/static"), name="static")
+
+# Templates
+templates = Jinja2Templates(directory="src/templates")
+
+# Add markdown filter to Jinja2
+def markdown_filter(text):
+    return markdown.markdown(text, extensions=['extra', 'nl2br'])
+
+templates.env.filters["markdown"] = markdown_filter
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Initialize services
+vector_store = VectorStore()
+document_processor = DocumentProcessor(vector_store)
+database = Database()
+
+class AnalysisResponse(BaseModel):
+    document_id: str
+    summary: str
+    issues: List[dict]
+    recommendations: List[str]
+
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+
+@app.get("/documents", response_class=HTMLResponse)
+async def documents(request: Request):
+    try:
+        # Get all documents from database
+        documents = database.get_all_metadata()
+        
+        return templates.TemplateResponse(
+            "documents.html",
+            {
+                "request": request,
+                "documents": documents
+            }
+        )
+    except Exception as e:
+        error_msg = f"Error fetching documents: {str(e)}"
+        logging.error(error_msg)
+        logging.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=error_msg)
+
+@app.post("/upload-document")
+async def upload_document(
+    file: UploadFile = File(...),
+    document_type: str = Form(...),
+):
+    try:
+        # Generate unique document ID
+        doc_id = str(uuid.uuid4())
+        logging.info(f"Processing upload for document ID: {doc_id}")
+        
+        # Save the uploaded file
+        file_path = f"data/uploads/{doc_id}_{file.filename}"
+        os.makedirs("data/uploads", exist_ok=True)
+        
+        with open(file_path, "wb") as buffer:
+            content = await file.read()
+            buffer.write(content)
+        
+        logging.info(f"File saved to {file_path}")
+        
+        # Process the document
+        await document_processor.process_document(doc_id, file_path, document_type)
+        
+        # Save document metadata
+        metadata = {
+            "document_id": doc_id,
+            "filename": file.filename,
+            "document_type": document_type,
+        }
+        
+        # Save metadata to database
+        database.save_metadata(doc_id, metadata)
+        
+        logging.info(f"Document {doc_id} processed successfully")
+        return {"document_id": doc_id, "message": "Document uploaded and processed successfully"}
+    except Exception as e:
+        error_msg = f"Error processing document: {str(e)}"
+        logging.error(error_msg)
+        logging.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=error_msg)
+
+@app.get("/document/{doc_id}/analysis", response_class=HTMLResponse)
+async def get_analysis(request: Request, doc_id: str):
+    try:
+        analysis = await document_processor.get_analysis(doc_id)
+        metadata = database.get_metadata(doc_id)
+        
+        return templates.TemplateResponse(
+            "analysis.html", 
+            {
+                "request": request,
+                "analysis": analysis,
+                "metadata": metadata
+            }
+        )
+    except Exception as e:
+        error_msg = f"Error retrieving analysis: {str(e)}"
+        logging.error(error_msg)
+        logging.error(traceback.format_exc())
+        raise HTTPException(status_code=404, detail=error_msg)
+
+@app.post("/document/{doc_id}/resubmit")
+async def resubmit_document(
+    request: Request,
+    doc_id: str,
+    file: UploadFile = File(...),
+    document_type: Optional[str] = Form(None),
+    description: Optional[str] = Form(None)
+):
+    try:
+        logging.info(f"Received resubmit request for document {doc_id}")
+        logging.info(f"File: {file.filename}, Document Type: {document_type}, Description: {description}")
+        
+        # Save the resubmitted file
+        file_path = f"data/uploads/{doc_id}_resubmit_{file.filename}"
+        os.makedirs("data/uploads", exist_ok=True)
+        
+        with open(file_path, "wb") as buffer:
+            content = await file.read()
+            buffer.write(content)
+        
+        logging.info(f"Saved resubmitted file to {file_path}")
+        
+        # Get existing metadata
+        try:
+            metadata = database.get_metadata(doc_id)
+            logging.info(f"Retrieved existing metadata for document {doc_id}: {metadata}")
+            
+            # Update metadata if provided
+            if document_type:
+                metadata["document_type"] = document_type
+            if description:
+                metadata["description"] = description
+                
+            # Save updated metadata
+            database.save_metadata(doc_id, metadata)
+            logging.info(f"Updated metadata for resubmitted document {doc_id}")
+        except Exception as e:
+            logging.error(f"Error updating metadata for resubmitted document {doc_id}: {str(e)}")
+            # If we can't get the metadata, use the provided document type or a default
+            if not document_type:
+                document_type = "unknown"
+        
+        # Process the resubmitted document with the correct document type
+        doc_type = document_type if document_type else metadata.get("document_type", "unknown")
+        logging.info(f"Processing resubmitted document {doc_id} with document type {doc_type}")
+        await document_processor.process_document(doc_id, file_path, doc_type, is_resubmission=True)
+        
+        logging.info(f"Document {doc_id} resubmitted successfully")
+        
+        # Redirect back to the analysis page
+        return RedirectResponse(url=f"/document/{doc_id}/analysis", status_code=303)
+    except Exception as e:
+        error_msg = f"Error resubmitting document: {str(e)}"
+        logging.error(error_msg)
+        logging.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=error_msg)
+
+@app.delete("/document/{doc_id}")
+async def delete_document(doc_id: str):
+    try:
+        # Get document metadata to find the filename
+        metadata = database.get_metadata(doc_id)
+        filename = metadata.get('filename', '')
+        
+        # Delete the uploaded file
+        upload_path = f"data/uploads/{doc_id}_{filename}"
+        if os.path.exists(upload_path):
+            os.remove(upload_path)
+            logging.info(f"Deleted uploaded file: {upload_path}")
+        
+        # Delete any resubmitted files
+        resubmit_pattern = f"data/uploads/{doc_id}_resubmit_*"
+        for resubmit_file in os.listdir("data/uploads"):
+            if resubmit_file.startswith(f"{doc_id}_resubmit_"):
+                os.remove(os.path.join("data/uploads", resubmit_file))
+                logging.info(f"Deleted resubmitted file: {resubmit_file}")
+        
+        # Delete from database
+        database.delete_document(doc_id)
+        
+        # Remove from vector store
+        vector_store.delete_document(doc_id)
+        logging.info(f"Removed document {doc_id} from vector store")
+        
+        return {"message": "Document deleted successfully"}
+    except HTTPException:
+        raise
+    except Exception as e:
+        error_msg = f"Error deleting document: {str(e)}"
+        logging.error(error_msg)
+        logging.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=error_msg)
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=8000) 
@@ -0,0 +1,42 @@
+from pinecone import Pinecone
+from services.config import config
+import logging
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("reset_pinecone.log"),
+        logging.StreamHandler()
+    ]
+)
+
+def reset_pinecone_index():
+    try:
+        # Initialize Pinecone client
+        pinecone = Pinecone(api_key=config.PINECONE_API_KEY)
+        
+        # Check if index exists
+        if config.PINECONE_INDEX_NAME in pinecone.list_indexes().names():
+            logging.info(f"Deleting existing index '{config.PINECONE_INDEX_NAME}'")
+            pinecone.delete_index(config.PINECONE_INDEX_NAME)
+        
+        # Create a new index with the correct dimension
+        logging.info(f"Creating new index '{config.PINECONE_INDEX_NAME}' with dimension {config.VECTOR_DIMENSION}")
+        pinecone.create_index(
+            name=config.PINECONE_INDEX_NAME,
+            dimension=config.VECTOR_DIMENSION,
+            metric="cosine",
+            spec=ServerlessSpec(cloud="aws", region="us-east-1")
+        )
+        
+        logging.info("Pinecone index reset successfully")
+        return True
+    except Exception as e:
+        logging.error(f"Error resetting Pinecone index: {str(e)}")
+        return False
+
+if __name__ == "__main__":
+    from pinecone import ServerlessSpec
+    reset_pinecone_index() 
@@ -0,0 +1,49 @@
+import os
+import json
+import logging
+from services.database import Database
+
+def migrate_data():
+    """Migrate existing data from filesystem to SQLite database."""
+    try:
+        database = Database()
+        
+        # Migrate metadata
+        metadata_dir = "data/metadata"
+        if os.path.exists(metadata_dir):
+            for filename in os.listdir(metadata_dir):
+                if filename.endswith('.json'):
+                    doc_id = filename[:-5]  # Remove .json extension
+                    with open(os.path.join(metadata_dir, filename), 'r') as f:
+                        metadata = json.load(f)
+                        database.save_metadata(doc_id, metadata)
+                        logging.info(f"Migrated metadata for document {doc_id}")
+        
+        # Migrate analysis
+        analysis_dir = "data/analysis"
+        if os.path.exists(analysis_dir):
+            for filename in os.listdir(analysis_dir):
+                if filename.endswith('.json'):
+                    doc_id = filename[:-5]  # Remove .json extension
+                    with open(os.path.join(analysis_dir, filename), 'r') as f:
+                        analysis = json.load(f)
+                        database.save_analysis(doc_id, analysis)
+                        logging.info(f"Migrated analysis for document {doc_id}")
+        
+        logging.info("Migration completed successfully")
+    except Exception as e:
+        logging.error(f"Error during migration: {str(e)}")
+        raise
+
+if __name__ == "__main__":
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.FileHandler("migration.log"),
+            logging.StreamHandler()
+        ]
+    )
+    
+    migrate_data() 
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+@dataclass
+class Settings:
+    # API Keys
+    COHERE_API_KEY: str = os.getenv("COHERE_API_KEY", "")
+    DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY", "")
+    PINECONE_API_KEY: str = os.getenv("PINECONE_API_KEY", "")
+    PINECONE_ENVIRONMENT: str = os.getenv("PINECONE_ENVIRONMENT", "")
+    
+    # Vector DB Settings
+    PINECONE_INDEX_NAME: str = "document-compliance"
+    
+    # Model Settings
+    COHERE_EMBEDDING_MODEL: str = "embed-english-v3.0"
+    COHERE_RERANKER_MODEL: str = "rerank-english-v2.0"
+    DEEPSEEK_MODEL: str = "deepseek-r1"
+    VECTOR_DIMENSION: int = 1024  # Updated to match Cohere's embedding dimension
+
+config = Settings()
@@ -0,0 +1,162 @@
+import sqlite3
+import json
+import logging
+from typing import Dict, Any, Optional
+import os
+
+class Database:
+    def __init__(self, db_path: str = "data/app.db"):
+        self.db_path = db_path
+        os.makedirs(os.path.dirname(db_path), exist_ok=True)
+        self._init_db()
+
+    def _init_db(self):
+        """Initialize the database with required tables."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                
+                # Create analysis table
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS analysis (
+                        document_id TEXT PRIMARY KEY,
+                        summary TEXT,
+                        issues TEXT,
+                        recommendations TEXT,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                    )
+                ''')
+                
+                # Create metadata table
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS metadata (
+                        document_id TEXT PRIMARY KEY,
+                        filename TEXT,
+                        document_type TEXT,
+                        description TEXT,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                    )
+                ''')
+                
+                conn.commit()
+        except Exception as e:
+            logging.error(f"Error initializing database: {str(e)}")
+            raise
+
+    def save_analysis(self, document_id: str, analysis: Dict[str, Any]):
+        """Save analysis results to the database."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    INSERT OR REPLACE INTO analysis (document_id, summary, issues, recommendations)
+                    VALUES (?, ?, ?, ?)
+                ''', (
+                    document_id,
+                    analysis['summary'],
+                    json.dumps(analysis['issues']),
+                    json.dumps(analysis['recommendations'])
+                ))
+                conn.commit()
+        except Exception as e:
+            logging.error(f"Error saving analysis for document {document_id}: {str(e)}")
+            raise
+
+    def get_analysis(self, document_id: str) -> Dict[str, Any]:
+        """Retrieve analysis results from the database."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('SELECT summary, issues, recommendations FROM analysis WHERE document_id = ?', (document_id,))
+                result = cursor.fetchone()
+                
+                if not result:
+                    raise FileNotFoundError(f"Analysis not found for document {document_id}")
+                
+                return {
+                    'document_id': document_id,
+                    'summary': result[0],
+                    'issues': json.loads(result[1]),
+                    'recommendations': json.loads(result[2])
+                }
+        except Exception as e:
+            logging.error(f"Error retrieving analysis for document {document_id}: {str(e)}")
+            raise
+
+    def save_metadata(self, document_id: str, metadata: Dict[str, Any]):
+        """Save document metadata to the database."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    INSERT OR REPLACE INTO metadata (document_id, filename, document_type, description)
+                    VALUES (?, ?, ?, ?)
+                ''', (
+                    document_id,
+                    metadata['filename'],
+                    metadata['document_type'],
+                    metadata.get('description')
+                ))
+                conn.commit()
+        except Exception as e:
+            logging.error(f"Error saving metadata for document {document_id}: {str(e)}")
+            raise
+
+    def get_metadata(self, document_id: str) -> Dict[str, Any]:
+        """Retrieve document metadata from the database."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('SELECT filename, document_type, description FROM metadata WHERE document_id = ?', (document_id,))
+                result = cursor.fetchone()
+                
+                if not result:
+                    raise FileNotFoundError(f"Metadata not found for document {document_id}")
+                
+                return {
+                    'document_id': document_id,
+                    'filename': result[0],
+                    'document_type': result[1],
+                    'description': result[2]
+                }
+        except Exception as e:
+            logging.error(f"Error retrieving metadata for document {document_id}: {str(e)}")
+            raise
+
+    def get_all_metadata(self) -> list:
+        """Retrieve metadata for all documents."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT m.document_id, m.filename, m.document_type, m.description, m.created_at,
+                           CASE WHEN a.document_id IS NOT NULL THEN 1 ELSE 0 END as has_analysis
+                    FROM metadata m
+                    LEFT JOIN analysis a ON m.document_id = a.document_id
+                    ORDER BY m.created_at DESC
+                ''')
+                results = cursor.fetchall()
+                
+                return [{
+                    'document_id': row[0],
+                    'filename': row[1],
+                    'document_type': row[2],
+                    'description': row[3],
+                    'upload_date': row[4],
+                    'status': 'completed' if row[5] == 1 else 'processing'
+                } for row in results]
+        except Exception as e:
+            logging.error(f"Error retrieving all metadata: {str(e)}")
+            raise
+
+    def delete_document(self, document_id: str):
+        """Delete a document and its associated data from the database."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('DELETE FROM analysis WHERE document_id = ?', (document_id,))
+                cursor.execute('DELETE FROM metadata WHERE document_id = ?', (document_id,))
+                conn.commit()
+        except Exception as e:
+            logging.error(f"Error deleting document {document_id}: {str(e)}")
+            raise 
@@ -0,0 +1,248 @@
+import cohere
+import requests
+from typing import List, Dict, Any
+import json
+import os
+import logging
+from services.config import config
+from services.database import Database
+
+class DocumentProcessor:
+    def __init__(self, vector_store):
+        self.vector_store = vector_store
+        self.cohere_client = cohere.Client(config.COHERE_API_KEY)
+        self.deepseek_url = "https://api.deepseek.com/v1/chat/completions"
+        self.deepseek_headers = {
+            "Authorization": f"Bearer {config.DEEPSEEK_API_KEY}",
+            "Content-Type": "application/json"
+        }
+        self.database = Database()
+        
+    async def process_document(self, doc_id: str, file_path: str, document_type: str, is_resubmission: bool = False):
+        try:
+            # Read document content with error handling for encoding
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    content = f.read()
+            except UnicodeDecodeError:
+                # Try with a different encoding if UTF-8 fails
+                with open(file_path, 'r', encoding='latin-1') as f:
+                    content = f.read()
+            
+            logging.info(f"Processing document {doc_id} with content length: {len(content)}")
+            
+            # Generate embeddings
+            embeddings = self.cohere_client.embed(
+                texts=[content],
+                model=config.COHERE_EMBEDDING_MODEL,
+                input_type="search_document"  # Required parameter for the model
+            ).embeddings[0]
+            
+            # Store in vector database
+            self.vector_store.store_embedding(doc_id, embeddings, content)
+            
+            # Process with DeepSeek for initial parsing
+            deepseek_parse_payload = {
+                "model": "deepseek-chat",
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are a document analysis assistant. Extract key sections and requirements from the following document."
+                    },
+                    {
+                        "role": "user",
+                        "content": content
+                    }
+                ],
+                "max_tokens": 4000
+            }
+            
+            # Make the API call with error handling
+            try:
+                deepseek_parse_response = requests.post(
+                    self.deepseek_url, 
+                    json=deepseek_parse_payload, 
+                    headers=self.deepseek_headers,
+                    timeout=60  # Add timeout
+                )
+                
+                # Check if the response is successful
+                if deepseek_parse_response.status_code != 200:
+                    logging.error(f"DeepSeek API error: {deepseek_parse_response.status_code} - {deepseek_parse_response.text}")
+                    # Use a fallback summary if the API call fails
+                    summary = "Document analysis could not be completed due to API limitations."
+                else:
+                    # Try to parse the JSON response
+                    try:
+                        deepseek_parse_result = deepseek_parse_response.json()
+                        summary = deepseek_parse_result['choices'][0]['message']['content']
+                    except (json.JSONDecodeError, KeyError) as e:
+                        logging.error(f"Error parsing DeepSeek response: {str(e)}")
+                        logging.error(f"Response text: {deepseek_parse_response.text}")
+                        summary = "Document analysis could not be completed due to parsing errors."
+            except requests.exceptions.RequestException as e:
+                logging.error(f"Error calling DeepSeek API: {str(e)}")
+                summary = "Document analysis could not be completed due to API connection issues."
+            
+            # Process with DeepSeek for deep reasoning using URL
+            deepseek_payload = {
+                "model": "deepseek-chat",
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an expert in document compliance analysis. Analyze the following document for compliance issues and provide detailed feedback."
+                    },
+                    {
+                        "role": "user",
+                        "content": f"""Analyze this type of document {document_type} for compliance issues and provide detailed feedback:\n\n{content} 
+                        and these are the main sections of the document:\n\n{summary}"""
+                    }
+                ],
+                "max_tokens": 4000
+            }
+            
+            # Make the API call with error handling
+            try:
+                deepseek_response = requests.post(
+                    self.deepseek_url, 
+                    json=deepseek_payload, 
+                    headers=self.deepseek_headers,
+                    timeout=60  # Add timeout
+                )
+                
+                # Check if the response is successful
+                if deepseek_response.status_code != 200:
+                    logging.error(f"DeepSeek API error: {deepseek_response.status_code} - {deepseek_response.text}")
+                    # Use a fallback for issues if the API call fails
+                    issues = ["Document analysis could not be completed due to API limitations."]
+                else:
+                    # Try to parse the JSON response
+                    try:
+                        deepseek_result = deepseek_response.json()
+                        issues = self._extract_issues(deepseek_result['choices'][0]['message']['content'])
+                    except (json.JSONDecodeError, KeyError) as e:
+                        logging.error(f"Error parsing DeepSeek response: {str(e)}")
+                        logging.error(f"Response text: {deepseek_response.text}")
+                        issues = ["Document analysis could not be completed due to parsing errors."]
+            except requests.exceptions.RequestException as e:
+                logging.error(f"Error calling DeepSeek API: {str(e)}")
+                issues = ["Document analysis could not be completed due to API connection issues."]
+            
+            # Use Cohere reranker to prioritize issues
+            try:
+                reranked_issues = self.cohere_client.rerank(
+                    query="Compliance issues in technical document",
+                    documents=issues,
+                    model=config.COHERE_RERANKER_MODEL
+                )
+            except Exception as e:
+                logging.error(f"Error using Cohere reranker: {str(e)}")
+                # Create a simple reranked issues list if Cohere fails
+                reranked_issues = [type('obj', (object,), {'document': issue, 'index': i}) for i, issue in enumerate(issues)]
+            
+            # Store analysis results
+            analysis = {
+                "document_id": doc_id,
+                "summary": summary,
+                "issues": self._format_issues(reranked_issues),
+                "recommendations": self._generate_recommendations(reranked_issues)
+            }
+            
+            # Save analysis to database
+            self.database.save_analysis(doc_id, analysis)
+            
+            # If this is a resubmission, update the metadata in the database
+            if is_resubmission:
+                try:
+                    # Get existing metadata
+                    existing_metadata = self.database.get_metadata(doc_id)
+                    
+                    # Update with new document type if provided
+                    if document_type:
+                        existing_metadata["document_type"] = document_type
+                    
+                    # Save updated metadata
+                    self.database.save_metadata(doc_id, existing_metadata)
+                    logging.info(f"Updated metadata for resubmitted document {doc_id}")
+                except Exception as e:
+                    logging.error(f"Error updating metadata for resubmitted document {doc_id}: {str(e)}")
+                
+            logging.info(f"Document {doc_id} processed successfully")
+            return True
+        except Exception as e:
+            logging.error(f"Error processing document {doc_id}: {str(e)}")
+            raise
+            
+    async def get_analysis(self, doc_id: str) -> Dict[str, Any]:
+        return self.database.get_analysis(doc_id)
+            
+    def _extract_issues(self, deepseek_response: str) -> List[str]:
+        # Simple extraction of issues from DeepSeek's response
+        # In a real implementation, this would be more sophisticated
+        print(deepseek_response)
+        return [issue.strip() for issue in re.split(r'\d+\.', deepseek_response) if issue.strip()]
+        
+    def _format_issues(self, reranked_issues) -> List[Dict[str, Any]]:
+        return [
+            {
+                "issue": issue[0] if isinstance(issue, tuple) else issue.document,
+                "severity": "high" if i < 3 else "medium" if i < 6 else "low",
+                "rank": i + 1
+            }
+            for i, issue in enumerate(reranked_issues)
+        ]
+        
+    def _generate_recommendations(self, reranked_issues) -> List[str]:
+        # Generate specific recommendations for each issue
+        recommendations = []
+        print(f"Generating recommendations for {reranked_issues} issues")
+        # Extract the results from the RerankResponse object
+        results = reranked_issues.results if hasattr(reranked_issues, 'results') else reranked_issues
+        
+        for issue in results[:5]:  # Focus on top 5 issues
+            recommendation_payload = {
+                "model": "deepseek-chat",
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an expert in document compliance. Provide specific, actionable recommendations to fix compliance issues."
+                    },
+                    {
+                        "role": "user",
+                        "content": f"Provide a specific, actionable recommendation to fix this compliance issue: {issue}"
+                    }
+                ],
+                "max_tokens": 1000
+            }
+            
+            # Make the API call with error handling
+            try:
+                recommendation_response = requests.post(
+                    self.deepseek_url, 
+                    json=recommendation_payload, 
+                    headers=self.deepseek_headers,
+                    timeout=60  # Add timeout
+                )
+                
+                # Check if the response is successful
+                if recommendation_response.status_code != 200:
+                    logging.error(f"DeepSeek API error: {recommendation_response.status_code} - {recommendation_response.text}")
+                    recommendations.append("Recommendation could not be generated due to API limitations.")
+                else:
+                    # Try to parse the JSON response
+                    try:
+                        recommendation_result = recommendation_response.json()
+                        recommendations.append(recommendation_result['choices'][0]['message']['content'])
+                    except (json.JSONDecodeError, KeyError) as e:
+                        logging.error(f"Error parsing DeepSeek response: {str(e)}")
+                        logging.error(f"Response text: {recommendation_response.text}")
+                        recommendations.append("Recommendation could not be generated due to parsing errors.")
+            except requests.exceptions.RequestException as e:
+                logging.error(f"Error calling DeepSeek API: {str(e)}")
+                recommendations.append("Recommendation could not be generated due to API connection issues.")
+                
+        return recommendations
+    
+    def _store_document(self, doc_id: str, file_path: str):
+        # save document to vector store
+        self.vector_store.add_document(doc_id, file_path)
@@ -0,0 +1,57 @@
+import cohere
+from typing import List, Union
+from services.config import config
+
+class EmbeddingService:
+    def __init__(self):
+        self.cohere_client = cohere.Client(config.COHERE_API_KEY)
+        self.model = config.COHERE_EMBEDDING_MODEL
+
+    def create_embedding(self, text: str) -> List[float]:
+        """
+        Create an embedding for a single text using Cohere.
+        
+        Args:
+            text (str): The text to create an embedding for
+            
+        Returns:
+            List[float]: The embedding vector
+        """
+        response = self.cohere_client.embed(
+            texts=[text],
+            model=self.model,
+            input_type="search_document"
+        )
+        return response.embeddings[0]
+
+    def create_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Create embeddings for multiple texts using Cohere.
+        
+        Args:
+            texts (List[str]): List of texts to create embeddings for
+            
+        Returns:
+            List[List[float]]: List of embedding vectors
+        """
+        response = self.cohere_client.embed(
+            texts=texts,
+            model=self.model,
+            input_type="search_document",
+            dimension=config.VECTOR_DIMENSION
+        )
+        return response.embeddings
+
+    def create_embedding_from_file(self, file_path: str) -> List[float]:
+        """
+        Create an embedding from a file's contents.
+        
+        Args:
+            file_path (str): Path to the file to create an embedding for
+            
+        Returns:
+            List[float]: The embedding vector
+        """
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        return self.create_embedding(content) 
@@ -0,0 +1,139 @@
+from pinecone import Pinecone, ServerlessSpec
+from typing import List, Any, Optional
+from services.config import config
+from services.embedding_service import EmbeddingService
+import logging
+import os
+
+class VectorStore:
+    def __init__(self, pinecone_client: Optional[Pinecone] = None, embedding_service: Optional[EmbeddingService] = None):
+        self.pinecone = pinecone_client or Pinecone(api_key=config.PINECONE_API_KEY)
+        self.index_name = config.PINECONE_INDEX_NAME
+        self.embedding_service = embedding_service or EmbeddingService()
+        self._ensure_index()
+        
+    def _ensure_index(self):
+        """Ensure the Pinecone index exists, create if it doesn't."""
+        try:
+            # Check if index exists, create if it doesn't
+            if self.index_name not in self.pinecone.list_indexes().names():
+                # Create a new index with the correct dimension
+                self.pinecone.create_index(
+                    name=self.index_name,
+                    dimension=config.VECTOR_DIMENSION,  # Using the dimension from config
+                    metric="cosine",
+                    spec=ServerlessSpec(cloud="aws", region="us-east-1")
+                )
+                logging.info(f"Created new index '{self.index_name}' with dimension {config.VECTOR_DIMENSION}")
+            
+            self.index = self.pinecone.Index(self.index_name)
+            
+            # Check if the index dimension matches the config dimension
+            self._check_index_dimension()
+        except Exception as e:
+            logging.error(f"Error ensuring index exists: {str(e)}")
+            raise
+    
+    def _check_index_dimension(self):
+        """Check if the index dimension matches the config dimension and fix if needed."""
+        try:
+            # Get the index description
+            index_description = self.pinecone.describe_index(self.index_name)
+            index_dimension = index_description.dimension
+            
+            if index_dimension != config.VECTOR_DIMENSION:
+                logging.warning(f"Index dimension {index_dimension} does not match config dimension {config.VECTOR_DIMENSION}")
+                logging.info("Recreating index with correct dimension...")
+                
+                # Delete the existing index
+                self.pinecone.delete_index(self.index_name)
+                
+                # Create a new index with the correct dimension
+                self.pinecone.create_index(
+                    name=self.index_name,
+                    dimension=config.VECTOR_DIMENSION,
+                    metric="cosine",
+                    spec=ServerlessSpec(cloud="aws", region="us-east-1")
+                )
+                
+                # Reinitialize the index
+                self.index = self.pinecone.Index(self.index_name)
+                logging.info(f"Index recreated with dimension {config.VECTOR_DIMENSION}")
+        except Exception as e:
+            logging.error(f"Error checking index dimension: {str(e)}")
+            raise
+        
+    def store_embedding(self, doc_id: str, embedding: List[float], content: str):
+        """Store document embedding in Pinecone."""
+        try:
+            # Verify embedding dimension matches the index dimension
+            if len(embedding) != config.VECTOR_DIMENSION:
+                raise ValueError(f"Embedding dimension {len(embedding)} does not match index dimension {config.VECTOR_DIMENSION}")
+                
+            self.index.upsert(
+                vectors=[{
+                    "id": doc_id,
+                    "values": embedding,
+                    "metadata": {
+                        "content": content
+                    }
+                }]
+            )
+            logging.info(f"Stored embedding for document {doc_id}")
+        except Exception as e:
+            logging.error(f"Error storing embedding for document {doc_id}: {str(e)}")
+            raise
+        
+    def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Any]:
+        """Search for similar documents."""
+        try:
+            # Verify query embedding dimension matches the index dimension
+            if len(query_embedding) != config.VECTOR_DIMENSION:
+                raise ValueError(f"Query embedding dimension {len(query_embedding)} does not match index dimension {config.VECTOR_DIMENSION}")
+                
+            results = self.index.query(
+                vector=query_embedding,
+                top_k=top_k,
+                include_metadata=True
+            )
+            return results.matches
+        except Exception as e:
+            logging.error(f"Error searching for similar documents: {str(e)}")
+            raise
+        
+    def delete_document(self, doc_id: str):
+        """Delete a document from the index."""
+        try:
+            self.index.delete(ids=[doc_id])
+            logging.info(f"Deleted document {doc_id} from index")
+        except Exception as e:
+            logging.error(f"Error deleting document {doc_id}: {str(e)}")
+            raise
+
+    def add_document(self, doc_id: str, file_path: str):
+        """Add a document to the index."""
+        try:
+            # Check if file exists
+            if not os.path.exists(file_path):
+                raise FileNotFoundError(f"File not found: {file_path}")
+                
+            # read document content with error handling for encoding
+            try:
+                with open(file_path, "r", encoding="utf-8") as file:
+                    content = file.read()
+            except UnicodeDecodeError:
+                # Try with a different encoding if UTF-8 fails
+                with open(file_path, "r", encoding="latin-1") as file:
+                    content = file.read()
+                    
+            # create embedding
+            embedding = self.embedding_service.create_embedding(content)
+            
+            # store embedding
+            logging.info(f"Storing embedding for document {doc_id}")
+            self.store_embedding(doc_id, embedding, content)
+            
+            return True
+        except Exception as e:
+            logging.error(f"Error adding document {doc_id}: {str(e)}")
+            raise
@@ -0,0 +1,104 @@
+/* Markdown Styles */
+.markdown-body {
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif;
+    font-size: 16px;
+    line-height: 1.5;
+    word-wrap: break-word;
+}
+
+.markdown-body h1,
+.markdown-body h2,
+.markdown-body h3,
+.markdown-body h4,
+.markdown-body h5,
+.markdown-body h6 {
+    margin-top: 24px;
+    margin-bottom: 16px;
+    font-weight: 600;
+    line-height: 1.25;
+}
+
+.markdown-body h1 { font-size: 2em; }
+.markdown-body h2 { font-size: 1.5em; }
+.markdown-body h3 { font-size: 1.25em; }
+.markdown-body h4 { font-size: 1em; }
+.markdown-body h5 { font-size: 0.875em; }
+.markdown-body h6 { font-size: 0.85em; }
+
+.markdown-body p {
+    margin-top: 0;
+    margin-bottom: 16px;
+}
+
+.markdown-body ul,
+.markdown-body ol {
+    padding-left: 2em;
+    margin-top: 0;
+    margin-bottom: 16px;
+}
+
+.markdown-body code {
+    padding: 0.2em 0.4em;
+    margin: 0;
+    font-size: 85%;
+    background-color: rgba(27, 31, 35, 0.05);
+    border-radius: 3px;
+    font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace;
+}
+
+.markdown-body pre {
+    padding: 16px;
+    overflow: auto;
+    font-size: 85%;
+    line-height: 1.45;
+    background-color: #f6f8fa;
+    border-radius: 3px;
+    margin-top: 0;
+    margin-bottom: 16px;
+}
+
+.markdown-body pre code {
+    padding: 0;
+    margin: 0;
+    font-size: 100%;
+    word-break: normal;
+    white-space: pre;
+    background: transparent;
+    border: 0;
+}
+
+.markdown-body blockquote {
+    padding: 0 1em;
+    color: #6a737d;
+    border-left: 0.25em solid #dfe2e5;
+    margin: 0 0 16px 0;
+}
+
+.markdown-body table {
+    display: block;
+    width: 100%;
+    overflow: auto;
+    margin-top: 0;
+    margin-bottom: 16px;
+    border-spacing: 0;
+    border-collapse: collapse;
+}
+
+.markdown-body table th {
+    font-weight: 600;
+}
+
+.markdown-body table th,
+.markdown-body table td {
+    padding: 6px 13px;
+    border: 1px solid #dfe2e5;
+}
+
+.markdown-body table tr {
+    background-color: #fff;
+    border-top: 1px solid #c6cbd1;
+}
+
+.markdown-body table tr:nth-child(2n) {
+    background-color: #f6f8fa;
+} 
@@ -0,0 +1,62 @@
+/* Custom styles for Mini SpecsComply Pro */
+
+body {
+    min-height: 100vh;
+    display: flex;
+    flex-direction: column;
+}
+
+.footer {
+    margin-top: auto;
+}
+
+.card {
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    border: none;
+    border-radius: 8px;
+}
+
+.card-header {
+    border-radius: 8px 8px 0 0 !important;
+}
+
+.form-control:focus, .form-select:focus {
+    border-color: #0d6efd;
+    box-shadow: 0 0 0 0.25rem rgba(13, 110, 253, 0.25);
+}
+
+.btn-primary {
+    padding: 0.5rem 1.5rem;
+    font-weight: 500;
+}
+
+.list-group-item {
+    border-left: none;
+    border-right: none;
+}
+
+.list-group-item:first-child {
+    border-top: none;
+}
+
+.list-group-item:last-child {
+    border-bottom: none;
+}
+
+/* File upload styling */
+input[type="file"] {
+    padding: 0.375rem 0.75rem;
+}
+
+/* Spinner styling */
+.spinner-border {
+    margin-right: 0.5rem;
+}
+
+/* Responsive adjustments */
+@media (max-width: 768px) {
+    .container {
+        padding-left: 15px;
+        padding-right: 15px;
+    }
+} 
@@ -0,0 +1,79 @@
+// Main JavaScript file for Mini SpecsComply Pro
+
+// Function to show toast notifications
+function showToast(message, type = 'info') {
+    // Create toast element
+    const toast = document.createElement('div');
+    toast.className = `toast align-items-center text-white bg-${type} border-0`;
+    toast.setAttribute('role', 'alert');
+    toast.setAttribute('aria-live', 'assertive');
+    toast.setAttribute('aria-atomic', 'true');
+    
+    // Create toast content
+    toast.innerHTML = `
+        <div class="d-flex">
+            <div class="toast-body">
+                ${message}
+            </div>
+            <button type="button" class="btn-close btn-close-white me-2 m-auto" data-bs-dismiss="toast" aria-label="Close"></button>
+        </div>
+    `;
+    
+    // Add toast to container
+    const toastContainer = document.getElementById('toastContainer') || createToastContainer();
+    toastContainer.appendChild(toast);
+    
+    // Initialize and show toast
+    const bsToast = new bootstrap.Toast(toast);
+    bsToast.show();
+    
+    // Remove toast after it's hidden
+    toast.addEventListener('hidden.bs.toast', () => {
+        toast.remove();
+    });
+}
+
+// Function to create toast container if it doesn't exist
+function createToastContainer() {
+    const container = document.createElement('div');
+    container.id = 'toastContainer';
+    container.className = 'toast-container position-fixed bottom-0 end-0 p-3';
+    document.body.appendChild(container);
+    return container;
+}
+
+// Function to validate file type
+function validateFileType(input) {
+    const allowedTypes = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'text/plain', 'text/markdown'];
+    const file = input.files[0];
+    
+    if (file && !allowedTypes.includes(file.type)) {
+        showToast('Please upload a PDF, DOCX, TXT, or MD file.', 'danger');
+        input.value = '';
+        return false;
+    }
+    
+    return true;
+}
+
+// Add event listeners when DOM is loaded
+document.addEventListener('DOMContentLoaded', function() {
+    // File input validation
+    const fileInput = document.getElementById('documentFile');
+    if (fileInput) {
+        fileInput.addEventListener('change', function() {
+            validateFileType(this);
+        });
+    }
+    
+    // Document type change handler
+    const docTypeSelect = document.getElementById('documentType');
+    if (docTypeSelect) {
+        docTypeSelect.addEventListener('change', function() {
+            const descriptionField = document.getElementById('documentDescription');
+            if (descriptionField) {
+                descriptionField.placeholder = `Brief description of your ${this.options[this.selectedIndex].text.toLowerCase()}...`;
+            }
+        });
+    }
+}); 
@@ -0,0 +1,135 @@
+{% extends "base.html" %}
+
+{% block title %}Analysis Results - Mini SpecsComply Pro{% endblock %}
+
+{% block extra_css %}
+<link rel="stylesheet" href="{{ url_for('static', path='css/markdown.css') }}">
+{% endblock %}
+
+{% block content %}
+<div class="row">
+    <div class="col-md-8 mx-auto">
+        <div class="card mb-4">
+            <div class="card-header bg-primary text-white d-flex justify-content-between align-items-center">
+                <h4 class="mb-0">Analysis Results</h4>
+                <span class="badge bg-light text-primary">{{ metadata.document_type|replace('_', ' ')|title }}</span>
+            </div>
+            <div class="card-body">
+                <h5 class="card-title">Document Information</h5>
+                <p class="card-text"><strong>Filename:</strong> {{ metadata.filename }}</p>
+                {% if metadata.description %}
+                <p class="card-text"><strong>Description:</strong> {{ metadata.description }}</p>
+                {% endif %}
+                
+                <hr>
+                
+                <h5 class="card-title">Summary</h5>
+                <div class="card mb-3">
+                    <div class="card-body bg-light markdown-body">
+                        {{ analysis.summary|markdown|safe }}
+                    </div>
+                </div>
+                
+                <h5 class="card-title">Compliance Issues</h5>
+                <div class="accordion" id="issuesAccordion">
+                    {% for issue in analysis.issues %}
+                    <div class="accordion-item">
+                        <h2 class="accordion-header" id="heading{{ loop.index }}">
+                            <button class="accordion-button {% if not loop.first %}collapsed{% endif %}" type="button" data-bs-toggle="collapse" data-bs-target="#collapse{{ loop.index }}">
+                                <span class="badge bg-{{ 'danger' if issue.severity == 'high' else 'warning' if issue.severity == 'medium' else 'info' }} me-2">
+                                    {{ issue.severity|title }}
+                                </span>
+                                {{ issue.issue }}
+                            </button>
+                        </h2>
+                        <div id="collapse{{ loop.index }}" class="accordion-collapse collapse {% if loop.first %}show{% endif %}" data-bs-parent="#issuesAccordion">
+                            <div class="accordion-body">
+                                <p><strong>Rank:</strong> {{ issue.rank }}</p>
+                                <p><strong>Recommendation:</strong></p>
+                                <div class="alert alert-info markdown-body">
+                                    {{ analysis.recommendations[loop.index0]|markdown|safe }}
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                    {% endfor %}
+                </div>
+            </div>
+            <div class="card-footer">
+                <div class="d-flex justify-content-between">
+                    <a href="/" class="btn btn-outline-primary">Back to Home</a>
+                    <button type="button" class="btn btn-primary" data-bs-toggle="modal" data-bs-target="#resubmitModal">
+                        Resubmit Document
+                    </button>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+
+<!-- Resubmit Modal -->
+<div class="modal fade" id="resubmitModal" tabindex="-1" aria-labelledby="resubmitModalLabel" aria-hidden="true">
+    <div class="modal-dialog">
+        <div class="modal-content">
+            <div class="modal-header">
+                <h5 class="modal-title" id="resubmitModalLabel">Resubmit Document</h5>
+                <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+            </div>
+            <div class="modal-body">
+                <form id="resubmitForm" action="/document/{{ metadata.document_id }}/resubmit" method="post" enctype="multipart/form-data">
+                    <input type="hidden" name="document_id" value="{{ metadata.document_id }}">
+                    <div class="mb-3">
+                        <label for="resubmitFile" class="form-label">Updated Document</label>
+                        <input class="form-control" type="file" id="resubmitFile" name="file" required>
+                    </div>
+                    <div class="mb-3">
+                        <label for="resubmitDocumentType" class="form-label">Document Type (Optional)</label>
+                        <select class="form-select" id="resubmitDocumentType" name="document_type">
+                            <option value="" selected>Keep current type ({{ metadata.document_type|replace('_', ' ')|title }})</option>
+                            <option value="technical_specification">Technical Specification</option>
+                            <option value="requirement_document">Requirement Document</option>
+                            <option value="design_document">Design Document</option>
+                            <option value="test_document">Test Document</option>
+                            <option value="user_manual">User Manual</option>
+                            <option value="other">Other</option>
+                        </select>
+                    </div>
+                    <div class="mb-3">
+                        <label for="resubmitDescription" class="form-label">Changes Made (Optional)</label>
+                        <textarea class="form-control" id="resubmitDescription" name="description" rows="3" placeholder="Describe the changes you made to address the issues..."></textarea>
+                    </div>
+                    <div class="modal-footer">
+                        <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Cancel</button>
+                        <button type="submit" class="btn btn-primary" id="resubmitButton">
+                            <span class="spinner-border spinner-border-sm d-none" role="status" aria-hidden="true"></span>
+                            Submit
+                        </button>
+                    </div>
+                </form>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+
+{% block extra_js %}
+<script>
+    // Add file type validation to the resubmit form
+    document.getElementById('resubmitFile').addEventListener('change', function() {
+        validateFileType(this);
+    });
+    
+    document.getElementById('resubmitForm').addEventListener('submit', function(e) {
+        // Don't prevent default - let the form submit normally
+        console.log('Form submitted');
+        console.log('Form action:', this.action);
+        
+        const submitButton = document.getElementById('resubmitButton');
+        const spinner = submitButton.querySelector('.spinner-border');
+        
+        // Show loading state
+        submitButton.disabled = true;
+        spinner.classList.remove('d-none');
+    });
+</script>
+{% endblock %} 
@@ -0,0 +1,52 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}Mini SpecsComply Pro{% endblock %}</title>
+    <!-- Bootstrap CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <!-- Custom CSS -->
+    <link rel="stylesheet" href="{{ url_for('static', path='css/style.css') }}">
+    {% block extra_css %}{% endblock %}
+</head>
+<body>
+    <nav class="navbar navbar-expand-lg navbar-dark bg-primary">
+        <div class="container">
+            <a class="navbar-brand" href="/">Mini SpecsComply Pro</a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav">
+                    <li class="nav-item">
+                        <a class="nav-link" href="/">Home</a>
+                    </li>
+                    <li class="nav-item">
+                        <a class="nav-link" href="/documents">My Documents</a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
+
+    <div class="container mt-4">
+        {% block content %}{% endblock %}
+    </div>
+
+    <!-- Toast Container -->
+    <div class="toast-container position-fixed bottom-0 end-0 p-3" id="toastContainer"></div>
+
+    <footer class="footer mt-5 py-3 bg-light">
+        <div class="container text-center">
+            <span class="text-muted">© 2025 Mini SpecsComply Pro</span>
+        </div>
+    </footer>
+
+    <!-- Bootstrap JS -->
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+    <!-- Custom JS -->
+    <script src="{{ url_for('static', path='js/main.js') }}"></script>
+    {% block extra_js %}{% endblock %}
+</body>
+</html> 
@@ -0,0 +1,131 @@
+{% extends "base.html" %}
+
+{% block title %}My Documents - Mini SpecsComply Pro{% endblock %}
+
+{% block content %}
+<div class="row">
+    <div class="col-md-10 mx-auto">
+        <div class="card">
+            <div class="card-header bg-primary text-white d-flex justify-content-between align-items-center">
+                <h4 class="mb-0">My Documents</h4>
+                <a href="/" class="btn btn-light btn-sm">Upload New Document</a>
+            </div>
+            <div class="card-body">
+                {% if documents %}
+                <div class="table-responsive">
+                    <table class="table table-hover">
+                        <thead>
+                            <tr>
+                                <th>Document</th>
+                                <th>Type</th>
+                                <th>Upload Date</th>
+                                <th>Status</th>
+                                <th>Actions</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            {% for doc in documents %}
+                            <tr>
+                                <td>
+                                    <div>
+                                        <strong>{{ doc.filename }}</strong>
+                                        {% if doc.description %}
+                                        <div class="text-muted small">{{ doc.description }}</div>
+                                        {% endif %}
+                                    </div>
+                                </td>
+                                <td>
+                                    <span class="badge bg-info">{{ doc.document_type|replace('_', ' ')|title }}</span>
+                                </td>
+                                <td>{{ doc.upload_date }}</td>
+                                <td>
+                                    {% if doc.status == 'completed' %}
+                                    <span class="badge bg-success">Completed</span>
+                                    {% elif doc.status == 'processing' %}
+                                    <span class="badge bg-warning">Processing</span>
+                                    {% else %}
+                                    <span class="badge bg-danger">Failed</span>
+                                    {% endif %}
+                                </td>
+                                <td>
+                                    <div class="btn-group">
+                                        <a href="/document/{{ doc.document_id }}/analysis" class="btn btn-sm btn-outline-primary">View Analysis</a>
+                                        <button type="button" class="btn btn-sm btn-outline-danger" data-bs-toggle="modal" data-bs-target="#deleteModal{{ doc.document_id }}">
+                                            Delete
+                                        </button>
+                                    </div>
+                                    
+                                    <!-- Delete Modal -->
+                                    <div class="modal fade" id="deleteModal{{ doc.document_id }}" tabindex="-1" aria-hidden="true">
+                                        <div class="modal-dialog">
+                                            <div class="modal-content">
+                                                <div class="modal-header">
+                                                    <h5 class="modal-title">Confirm Delete</h5>
+                                                    <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+                                                </div>
+                                                <div class="modal-body">
+                                                    <p>Are you sure you want to delete "{{ doc.filename }}"?</p>
+                                                    <p class="text-danger">This action cannot be undone.</p>
+                                                </div>
+                                                <div class="modal-footer">
+                                                    <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Cancel</button>
+                                                    <button type="button" class="btn btn-danger" onclick="deleteDocument('{{ doc.document_id }}')">Delete</button>
+                                                </div>
+                                            </div>
+                                        </div>
+                                    </div>
+                                </td>
+                            </tr>
+                            {% endfor %}
+                        </tbody>
+                    </table>
+                </div>
+                {% else %}
+                <div class="text-center py-5">
+                    <h5 class="text-muted">No documents uploaded yet</h5>
+                    <p>Upload your first document to get started with compliance analysis.</p>
+                    <a href="/" class="btn btn-primary mt-3">Upload Document</a>
+                </div>
+                {% endif %}
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+
+{% block extra_js %}
+<script>
+    function deleteDocument(docId) {
+        // Make an API call to delete the document
+        fetch(`/document/${docId}`, {
+            method: 'DELETE',
+            headers: {
+                'Content-Type': 'application/json'
+            }
+        })
+        .then(response => {
+            if (!response.ok) {
+                throw new Error('Failed to delete document');
+            }
+            return response.json();
+        })
+        .then(data => {
+            // Show success message
+            showToast('Document deleted successfully.', 'success');
+            
+            // Close the modal
+            const modal = bootstrap.Modal.getInstance(document.getElementById(`deleteModal${docId}`));
+            modal.hide();
+            
+            // Reload the page after a short delay
+            setTimeout(() => {
+                window.location.reload();
+            }, 1000);
+        })
+        .catch(error => {
+            console.error('Error:', error);
+            showToast('Failed to delete document. Please try again.', 'danger');
+        });
+    }
+</script>
+{% endblock %} 
@@ -0,0 +1,99 @@
+{% extends "base.html" %}
+
+{% block title %}Home - Mini SpecsComply Pro{% endblock %}
+
+{% block content %}
+<div class="row justify-content-center">
+    <div class="col-md-8">
+        <div class="card">
+            <div class="card-header bg-primary text-white">
+                <h4 class="mb-0">Upload Document for Compliance Analysis</h4>
+            </div>
+            <div class="card-body">
+                <form id="uploadForm" action="/upload-document" method="post" enctype="multipart/form-data">
+                    <div class="mb-3">
+                        <label for="documentType" class="form-label">Document Type</label>
+                        <select class="form-select" id="documentType" name="document_type" required>
+                            <option value="" selected disabled>Select document type</option>
+                            <option value="technical_specification">Technical Specification</option>
+                            <option value="requirement_document">Requirement Document</option>
+                            <option value="design_document">Design Document</option>
+                            <option value="test_document">Test Document</option>
+                            <option value="user_manual">User Manual</option>
+                            <option value="other">Other</option>
+                        </select>
+                    </div>
+                    
+                    <div class="mb-3">
+                        <label for="documentFile" class="form-label">Document File</label>
+                        <input class="form-control" type="file" id="documentFile" name="file" required>
+                        <div class="form-text">Supported formats: PDF, DOCX, TXT, MD</div>
+                    </div>
+                    
+                    <div class="d-grid">
+                        <button type="submit" class="btn btn-primary" id="uploadButton">
+                            <span class="spinner-border spinner-border-sm d-none" role="status" aria-hidden="true"></span>
+                            Upload and analyze
+                        </button>
+                    </div>
+                </form>
+            </div>
+        </div>
+        
+        <div class="card mt-4">
+            <div class="card-header bg-info text-white">
+                <h5 class="mb-0">How It Works</h5>
+            </div>
+            <div class="card-body">
+                <ol class="list-group list-group-numbered">
+                    <li class="list-group-item">Upload your document and select its type</li>
+                    <li class="list-group-item">Our AI analyzes the document for compliance issues</li>
+                    <li class="list-group-item">Receive a detailed report with issues and recommendations</li>
+                    <li class="list-group-item">Make necessary changes and resubmit if needed</li>
+                </ol>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+
+{% block extra_js %}
+<script>
+    document.getElementById('uploadForm').addEventListener('submit', function(e) {
+        e.preventDefault();
+        
+        const uploadButton = document.getElementById('uploadButton');
+        const spinner = uploadButton.querySelector('.spinner-border');
+        
+        // Show loading state
+        uploadButton.disabled = true;
+        spinner.classList.remove('d-none');
+        
+        // Create FormData object
+        const formData = new FormData(this);
+        
+        // Send the form data
+        fetch('/upload-document', {
+            method: 'POST',
+            body: formData
+        })
+        .then(response => {
+            if (response.ok) {
+                // Redirect to /documents route on success
+                window.location.href = '/documents';
+            } else {
+                throw new Error('Failed to upload document');
+            }
+        })
+        .catch(error => {
+            console.error('Error:', error);
+            alert('Error uploading document. Please try again.');
+        })
+        .finally(() => {
+            // Reset button state
+            uploadButton.disabled = false;
+            spinner.classList.add('d-none');
+        });
+    });
+</script>
+{% endblock %}