feat: Implement background document processing and namespace support
- Added background task processing for document uploads to improve responsiveness. - Updated the DocumentProcessor to use synchronous processing. - Introduced namespace configuration in VectorStore for better organization of stored embeddings. - Enhanced logging to reflect changes in document processing and embedding storage.
This commit is contained in:
+13
-5
@@ -1,4 +1,4 @@
|
|||||||
from fastapi import FastAPI, UploadFile, File, HTTPException, Form, Request
|
from fastapi import FastAPI, UploadFile, File, HTTPException, Form, Request, BackgroundTasks
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
@@ -90,6 +90,7 @@ async def documents(request: Request):
|
|||||||
|
|
||||||
@app.post("/upload-document")
|
@app.post("/upload-document")
|
||||||
async def upload_document(
|
async def upload_document(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
file: UploadFile = File(...),
|
file: UploadFile = File(...),
|
||||||
document_type: str = Form(...),
|
document_type: str = Form(...),
|
||||||
):
|
):
|
||||||
@@ -108,8 +109,8 @@ async def upload_document(
|
|||||||
|
|
||||||
logging.info(f"File saved to {file_path}")
|
logging.info(f"File saved to {file_path}")
|
||||||
|
|
||||||
# Process the document
|
# Add document processing to background tasks
|
||||||
await document_processor.process_document(doc_id, file_path, document_type)
|
background_tasks.add_task(document_processor.process_document, doc_id, file_path, document_type)
|
||||||
|
|
||||||
# Save document metadata
|
# Save document metadata
|
||||||
metadata = {
|
metadata = {
|
||||||
@@ -121,14 +122,21 @@ async def upload_document(
|
|||||||
# Save metadata to database
|
# Save metadata to database
|
||||||
database.save_metadata(doc_id, metadata)
|
database.save_metadata(doc_id, metadata)
|
||||||
|
|
||||||
logging.info(f"Document {doc_id} processed successfully")
|
logging.info(f"Document {doc_id} upload initiated successfully")
|
||||||
return {"document_id": doc_id, "message": "Document uploaded and processed successfully"}
|
return {"document_id": doc_id, "message": "Document uploaded successfully, processing in background"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error processing document: {str(e)}"
|
error_msg = f"Error processing document: {str(e)}"
|
||||||
logging.error(error_msg)
|
logging.error(error_msg)
|
||||||
logging.error(traceback.format_exc())
|
logging.error(traceback.format_exc())
|
||||||
raise HTTPException(status_code=500, detail=error_msg)
|
raise HTTPException(status_code=500, detail=error_msg)
|
||||||
|
|
||||||
|
def process_document_background(doc_id: str, file_path: str, document_type: str):
|
||||||
|
try:
|
||||||
|
document_processor.process_document(doc_id, file_path, document_type)
|
||||||
|
logging.info(f"Document {doc_id} processed successfully in background")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error processing document {doc_id} in background: {str(e)}")
|
||||||
|
|
||||||
@app.get("/document/{doc_id}/analysis", response_class=HTMLResponse)
|
@app.get("/document/{doc_id}/analysis", response_class=HTMLResponse)
|
||||||
async def get_analysis(request: Request, doc_id: str):
|
async def get_analysis(request: Request, doc_id: str):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class DocumentProcessor:
|
|||||||
}
|
}
|
||||||
self.database = Database() or database
|
self.database = Database() or database
|
||||||
|
|
||||||
async def process_document(self, doc_id: str, file_path: str, document_type: str, is_resubmission: bool = False):
|
def process_document(self, doc_id: str, file_path: str, document_type: str, is_resubmission: bool = False):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Read document content with error handling for encoding
|
# Read document content with error handling for encoding
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ class VectorStore:
|
|||||||
def __init__(self, pinecone_client: Optional[Pinecone] = None, embedding_service: Optional[EmbeddingService] = None):
|
def __init__(self, pinecone_client: Optional[Pinecone] = None, embedding_service: Optional[EmbeddingService] = None):
|
||||||
self.pinecone = pinecone_client or Pinecone(api_key=config.PINECONE_API_KEY)
|
self.pinecone = pinecone_client or Pinecone(api_key=config.PINECONE_API_KEY)
|
||||||
self.index_name = config.PINECONE_INDEX_NAME
|
self.index_name = config.PINECONE_INDEX_NAME
|
||||||
|
self.namespace = config.PINECONE_NAMESPACE # Added namespace configuration
|
||||||
self.embedding_service = embedding_service or EmbeddingService()
|
self.embedding_service = embedding_service or EmbeddingService()
|
||||||
self._ensure_index()
|
self._ensure_index()
|
||||||
|
|
||||||
@@ -77,9 +78,10 @@ class VectorStore:
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"content": content
|
"content": content
|
||||||
}
|
}
|
||||||
}]
|
}],
|
||||||
|
namespace=self.namespace # Use the namespace when storing embeddings
|
||||||
)
|
)
|
||||||
logging.info(f"Stored embedding for document {doc_id}")
|
logging.info(f"Stored embedding for document {doc_id} in namespace '{self.namespace}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error storing embedding for document {doc_id}: {str(e)}")
|
logging.error(f"Error storing embedding for document {doc_id}: {str(e)}")
|
||||||
raise
|
raise
|
||||||
@@ -94,7 +96,8 @@ class VectorStore:
|
|||||||
results = self.index.query(
|
results = self.index.query(
|
||||||
vector=query_embedding,
|
vector=query_embedding,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
include_metadata=True
|
include_metadata=True,
|
||||||
|
namespace=self.namespace # Use the namespace when querying
|
||||||
)
|
)
|
||||||
return results.matches
|
return results.matches
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -104,8 +107,8 @@ class VectorStore:
|
|||||||
def delete_document(self, doc_id: str):
|
def delete_document(self, doc_id: str):
|
||||||
"""Delete a document from the index."""
|
"""Delete a document from the index."""
|
||||||
try:
|
try:
|
||||||
self.index.delete(ids=[doc_id])
|
self.index.delete(ids=[doc_id], namespace=self.namespace) # Use the namespace when deleting
|
||||||
logging.info(f"Deleted document {doc_id} from index")
|
logging.info(f"Deleted document {doc_id} from namespace '{self.namespace}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error deleting document {doc_id}: {str(e)}")
|
logging.error(f"Error deleting document {doc_id}: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -48,13 +48,14 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
|
{% if doc.status == 'completed' %}
|
||||||
<div class="btn-group">
|
<div class="btn-group">
|
||||||
<a href="/document/{{ doc.document_id }}/analysis" class="btn btn-sm btn-outline-primary">View Analysis</a>
|
<a href="/document/{{ doc.document_id }}/analysis" class="btn btn-sm btn-outline-primary">View Analysis</a>
|
||||||
<button type="button" class="btn btn-sm btn-outline-danger" data-bs-toggle="modal" data-bs-target="#deleteModal{{ doc.document_id }}">
|
<button type="button" class="btn btn-sm btn-outline-danger" data-bs-toggle="modal" data-bs-target="#deleteModal{{ doc.document_id }}">
|
||||||
Delete
|
Delete
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
<!-- Delete Modal -->
|
<!-- Delete Modal -->
|
||||||
<div class="modal fade" id="deleteModal{{ doc.document_id }}" tabindex="-1" aria-hidden="true">
|
<div class="modal fade" id="deleteModal{{ doc.document_id }}" tabindex="-1" aria-hidden="true">
|
||||||
<div class="modal-dialog">
|
<div class="modal-dialog">
|
||||||
|
|||||||
Reference in New Issue
Block a user