155 lines
4.4 KiB
Python
155 lines
4.4 KiB
Python
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||
|
|
from fastapi.responses import JSONResponse
|
||
|
|
from typing import Optional
|
||
|
|
import os
|
||
|
|
import uuid
|
||
|
|
from datetime import datetime
|
||
|
|
from .config import Config
|
||
|
|
from .embeddings import EmbeddingGenerator
|
||
|
|
from .vector_stores import VectorStore
|
||
|
|
import groq
|
||
|
|
import anthropic
|
||
|
|
|
||
|
|
app = FastAPI(title="Mini SpecsComply Pro")
|
||
|
|
embeddings = EmbeddingGenerator()
|
||
|
|
vector_store = VectorStore()
|
||
|
|
|
||
|
|
# Initialize clients
|
||
|
|
groq_client = groq.Client(api_key=Config.GROQ_API_KEY)
|
||
|
|
claude_client = anthropic.Anthropic(api_key=Config.CLAUDE_API_KEY)
|
||
|
|
|
||
|
|
|
||
|
|
def save_document(file: UploadFile) -> str:
|
||
|
|
os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True)
|
||
|
|
doc_id = str(uuid.uuid4())
|
||
|
|
ext = os.path.splitext(file.filename)[1].lower()
|
||
|
|
|
||
|
|
if ext not in Config.ALLOWED_EXTENSIONS:
|
||
|
|
raise HTTPException(400, "Unsupported file type")
|
||
|
|
|
||
|
|
file_path = os.path.join(Config.UPLOAD_FOLDER, f"{doc_id}{ext}")
|
||
|
|
with open(file_path, "wb") as f:
|
||
|
|
f.write(file.file.read())
|
||
|
|
|
||
|
|
return doc_id, file_path
|
||
|
|
|
||
|
|
|
||
|
|
def extract_text(file_path: str) -> str:
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
def analyze_compliance(text: str) -> dict:
|
||
|
|
# Parsing with Groq
|
||
|
|
groq_response = groq_client.chat.completions.create(
|
||
|
|
messages=[{"role": "user", "content": f"Extract key sections from this document:\n{text}"}],
|
||
|
|
model=Config.GROQ_MODEL
|
||
|
|
)
|
||
|
|
|
||
|
|
# Reasoning with Claude
|
||
|
|
claude_response = claude_client.messages.create(
|
||
|
|
model=Config.CLAUDE_MODEL,
|
||
|
|
max_tokens=4000,
|
||
|
|
messages=[
|
||
|
|
{
|
||
|
|
"role": "user",
|
||
|
|
"content": f"Analyze this document for compliance issues:\n{text}"
|
||
|
|
}
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
# Rerank by importance
|
||
|
|
issues = claude_response.content
|
||
|
|
ranked_issues = embeddings.rerank_issues(
|
||
|
|
issues=[issue.text for issue in issues],
|
||
|
|
query="Most critical compliance issues"
|
||
|
|
)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"summary": groq_response.choices[0].message.content,
|
||
|
|
"issues": ranked_issues,
|
||
|
|
"timestamp": datetime.now().isoformat()
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@app.post("/upload-document")
|
||
|
|
async def upload_document(file: UploadFile = File(...)):
|
||
|
|
try:
|
||
|
|
doc_id, file_path = save_document(file)
|
||
|
|
text = extract_text(file_path)
|
||
|
|
embedding = embeddings.generate_embeddings(text)
|
||
|
|
|
||
|
|
# Store in vector DB
|
||
|
|
vector_store.upsert_document(
|
||
|
|
doc_id=doc_id,
|
||
|
|
embedding=embedding,
|
||
|
|
metadata={
|
||
|
|
"filename": file.filename,
|
||
|
|
"upload_time": datetime.now().isoformat(),
|
||
|
|
"status": "pending"
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
# Start analysis
|
||
|
|
analysis = analyze_compliance(text)
|
||
|
|
|
||
|
|
return JSONResponse({
|
||
|
|
"document_id": doc_id,
|
||
|
|
"status": "analysis_complete",
|
||
|
|
"analysis": analysis
|
||
|
|
})
|
||
|
|
except Exception as e:
|
||
|
|
raise HTTPException(500, str(e))
|
||
|
|
|
||
|
|
|
||
|
|
@app.get("/document/{doc_id}/analysis")
|
||
|
|
async def get_analysis(doc_id: str):
|
||
|
|
doc = vector_store.get_document(doc_id)
|
||
|
|
if not doc:
|
||
|
|
raise HTTPException(404, "Document not found")
|
||
|
|
|
||
|
|
return JSONResponse({
|
||
|
|
"document_id": doc_id,
|
||
|
|
"metadata": doc.metadata,
|
||
|
|
"analysis": doc.metadata.get("analysis", {})
|
||
|
|
})
|
||
|
|
|
||
|
|
|
||
|
|
@app.post("/document/{doc_id}/resubmit")
|
||
|
|
async def resubmit_document(doc_id: str, file: UploadFile = File(...)):
|
||
|
|
try:
|
||
|
|
# Verify original exists
|
||
|
|
original = vector_store.get_document(doc_id)
|
||
|
|
if not original:
|
||
|
|
raise HTTPException(404, "Original document not found")
|
||
|
|
|
||
|
|
# Process new version
|
||
|
|
new_doc_id, file_path = save_document(file)
|
||
|
|
text = extract_text(file_path)
|
||
|
|
embedding = embeddings.generate_embeddings(text)
|
||
|
|
|
||
|
|
# Store new version
|
||
|
|
vector_store.upsert_document(
|
||
|
|
doc_id=new_doc_id,
|
||
|
|
embedding=embedding,
|
||
|
|
metadata={
|
||
|
|
"filename": file.filename,
|
||
|
|
"upload_time": datetime.now().isoformat(),
|
||
|
|
"status": "resubmitted",
|
||
|
|
"original_id": doc_id
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
# Analyze new version
|
||
|
|
analysis = analyze_compliance(text)
|
||
|
|
|
||
|
|
return JSONResponse({
|
||
|
|
"document_id": new_doc_id,
|
||
|
|
"status": "analysis_complete",
|
||
|
|
"analysis": analysis
|
||
|
|
})
|
||
|
|
except Exception as e:
|
||
|
|
raise HTTPException(500, str(e))
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
import uvicorn
|
||
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|