Files
ds_task_scp/backend/main.py
T

155 lines
4.4 KiB
Python
Raw Normal View History

2025-07-11 22:29:45 +01:00
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from typing import Optional
import os
import uuid
from datetime import datetime
from .config import Config
from .embeddings import EmbeddingGenerator
from .vector_stores import VectorStore
import groq
import anthropic
app = FastAPI(title="Mini SpecsComply Pro")
embeddings = EmbeddingGenerator()
vector_store = VectorStore()
# Initialize clients
groq_client = groq.Client(api_key=Config.GROQ_API_KEY)
claude_client = anthropic.Anthropic(api_key=Config.CLAUDE_API_KEY)
def save_document(file: UploadFile) -> str:
os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True)
doc_id = str(uuid.uuid4())
ext = os.path.splitext(file.filename)[1].lower()
if ext not in Config.ALLOWED_EXTENSIONS:
raise HTTPException(400, "Unsupported file type")
file_path = os.path.join(Config.UPLOAD_FOLDER, f"{doc_id}{ext}")
with open(file_path, "wb") as f:
f.write(file.file.read())
return doc_id, file_path
def extract_text(file_path: str) -> str:
pass
def analyze_compliance(text: str) -> dict:
# Parsing with Groq
groq_response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": f"Extract key sections from this document:\n{text}"}],
model=Config.GROQ_MODEL
)
# Reasoning with Claude
claude_response = claude_client.messages.create(
model=Config.CLAUDE_MODEL,
max_tokens=4000,
messages=[
{
"role": "user",
"content": f"Analyze this document for compliance issues:\n{text}"
}
]
)
# Rerank by importance
issues = claude_response.content
ranked_issues = embeddings.rerank_issues(
issues=[issue.text for issue in issues],
query="Most critical compliance issues"
)
return {
"summary": groq_response.choices[0].message.content,
"issues": ranked_issues,
"timestamp": datetime.now().isoformat()
}
@app.post("/upload-document")
async def upload_document(file: UploadFile = File(...)):
try:
doc_id, file_path = save_document(file)
text = extract_text(file_path)
embedding = embeddings.generate_embeddings(text)
# Store in vector DB
vector_store.upsert_document(
doc_id=doc_id,
embedding=embedding,
metadata={
"filename": file.filename,
"upload_time": datetime.now().isoformat(),
"status": "pending"
}
)
# Start analysis
analysis = analyze_compliance(text)
return JSONResponse({
"document_id": doc_id,
"status": "analysis_complete",
"analysis": analysis
})
except Exception as e:
raise HTTPException(500, str(e))
@app.get("/document/{doc_id}/analysis")
async def get_analysis(doc_id: str):
doc = vector_store.get_document(doc_id)
if not doc:
raise HTTPException(404, "Document not found")
return JSONResponse({
"document_id": doc_id,
"metadata": doc.metadata,
"analysis": doc.metadata.get("analysis", {})
})
@app.post("/document/{doc_id}/resubmit")
async def resubmit_document(doc_id: str, file: UploadFile = File(...)):
try:
# Verify original exists
original = vector_store.get_document(doc_id)
if not original:
raise HTTPException(404, "Original document not found")
# Process new version
new_doc_id, file_path = save_document(file)
text = extract_text(file_path)
embedding = embeddings.generate_embeddings(text)
# Store new version
vector_store.upsert_document(
doc_id=new_doc_id,
embedding=embedding,
metadata={
"filename": file.filename,
"upload_time": datetime.now().isoformat(),
"status": "resubmitted",
"original_id": doc_id
}
)
# Analyze new version
analysis = analyze_compliance(text)
return JSONResponse({
"document_id": new_doc_id,
"status": "analysis_complete",
"analysis": analysis
})
except Exception as e:
raise HTTPException(500, str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)