last commi

This commit is contained in:
timothyafolami
2024-08-16 21:39:28 +01:00
parent 83ecbded4b
commit ddd0dda276
18 changed files with 429 additions and 12 deletions
Binary file not shown.
Binary file not shown.
+83
View File
@@ -0,0 +1,83 @@
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from fastapi import FastAPI, File, UploadFile, BackgroundTasks, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from utils import load_documents_from_directory, create_vector_store, save_embedded_data, process_directory
from pydantic import BaseModel
from search import search_and_summarize
from typing import List
app = FastAPI()
# Define allowed origins for CORS
origins = [
"http://localhost",
"http://localhost:8000",
"http://localhost:3000",
# Add other allowed origins here
]
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=origins, # Allows requests from listed origins
allow_credentials=True,
allow_methods=["*"], # Allows all HTTP methods
allow_headers=["*"], # Allows all headers
)
# Define the directory where you want to save uploaded files
UPLOAD_DIR = Path("./uploads")
# Ensure the directory exists
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
class SearchRequest(BaseModel):
query: str
def load_documents(directory: str):
# loading the documents from the directory
documents, docs_id, num_pages = load_documents_from_directory(directory)
# embedding the documents
embed_db = create_vector_store(documents, docs_id, num_pages)
# saving the embedded data
status = save_embedded_data(embed_db)
# creating the thumbnails
status = process_directory(directory)
return {"status": "Documents loaded successfully"}
class SearchRequest(BaseModel):
query: str
@app.post("/search/")
def search(request: SearchRequest):
# Perform search using the utility function
results = search_and_summarize(request.query)
return {"results": results}
@app.post("/upload/")
async def upload_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
file_location = UPLOAD_DIR/file.filename
# Save the uploaded file to the specified location
with open(file_location, "wb") as buffer:
buffer.write(await file.read())
# Add the load_documents function to the background tasks
background_tasks.add_task(load_documents, str(UPLOAD_DIR))
# Return the location of the saved file and inform about the successful upload
return {"message": "Upload successful. Document loading will begin shortly.", "file_location": str(UPLOAD_DIR)}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
+333
View File
@@ -2808,3 +2808,336 @@ To avoid injury, hold up the wheel when unscrewing the bolts.
2024-08-16 17:37:05,637 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 17:37:06,226 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 17:37:13,187 - INFO - Embedding model loaded
2024-08-16 17:38:12,435 - INFO - Searching for the query
2024-08-16 17:38:12,436 - INFO - Searching for Wirebrush
2024-08-16 17:38:12,436 - INFO - Loading embedded data
2024-08-16 17:38:14,222 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-16 17:38:14,372 - INFO - Search completed
2024-08-16 17:38:14,373 - INFO - Search completed
2024-08-16 17:38:14,373 - INFO - Summarizing search results
2024-08-16 17:38:14,375 - INFO - Search results summarized
2024-08-16 20:07:32,804 - INFO - Loading the embedding model
2024-08-16 20:07:41,433 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:07:41,857 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:07:49,114 - INFO - Embedding model loaded
2024-08-16 20:10:52,736 - INFO - Loading the embedding model
2024-08-16 20:10:58,470 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:10:58,831 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:11:04,768 - INFO - Embedding model loaded
2024-08-16 20:11:44,178 - INFO - Loading the embedding model
2024-08-16 20:11:51,103 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:11:51,549 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:11:56,825 - INFO - Embedding model loaded
2024-08-16 20:15:04,657 - INFO - Searching for the query
2024-08-16 20:15:04,657 - INFO - Searching for wirebrush
2024-08-16 20:15:04,657 - INFO - Loading embedded data
2024-08-16 20:16:13,332 - INFO - Loading the embedding model
2024-08-16 20:16:19,225 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:16:19,602 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:16:24,953 - INFO - Embedding model loaded
2024-08-16 20:16:51,193 - INFO - Searching for the query
2024-08-16 20:16:51,193 - INFO - Searching for wirebrush
2024-08-16 20:16:51,193 - INFO - Loading embedded data
2024-08-16 20:16:53,308 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-16 20:16:53,475 - INFO - Search completed
2024-08-16 20:16:53,475 - INFO - Search completed
2024-08-16 20:16:53,475 - INFO - Summarizing search results
2024-08-16 20:16:53,475 - INFO - Search results summarized
2024-08-16 20:17:37,427 - INFO - Searching for the query
2024-08-16 20:17:37,427 - INFO - Searching for
query IntrospectionQuery {
__schema {
queryType { name }
mutationType { name }
subscriptionType { name }
types {
...FullType
}
directives {
name
description
locations
args {
...InputValue
}
}
}
}
fragment FullType on __Type {
kind
name
description
fields(includeDeprecated: true) {
name
description
args {
...InputValue
}
type {
...TypeRef
}
isDeprecated
deprecationReason
}
inputFields {
...InputValue
}
interfaces {
...TypeRef
}
enumValues(includeDeprecated: true) {
name
description
isDeprecated
deprecationReason
}
possibleTypes {
...TypeRef
}
}
fragment InputValue on __InputValue {
name
description
type { ...TypeRef }
defaultValue
}
fragment TypeRef on __Type {
kind
name
ofType {
kind
name
ofType {
kind
name
ofType {
kind
name
ofType {
kind
name
ofType {
kind
name
ofType {
kind
name
ofType {
kind
name
}
}
}
}
}
}
}
}
2024-08-16 20:17:37,443 - INFO - Loading embedded data
2024-08-16 20:17:37,511 - INFO - Search completed
2024-08-16 20:17:37,511 - INFO - Search completed
2024-08-16 20:17:37,511 - INFO - Summarizing search results
2024-08-16 20:17:37,527 - INFO - Search results summarized
2024-08-16 20:18:42,832 - INFO - Searching for the query
2024-08-16 20:18:42,832 - INFO - Searching for wirebrush
2024-08-16 20:18:42,832 - INFO - Loading embedded data
2024-08-16 20:18:42,882 - INFO - Search completed
2024-08-16 20:18:42,899 - INFO - Search completed
2024-08-16 20:18:42,899 - INFO - Summarizing search results
2024-08-16 20:18:42,901 - INFO - Search results summarized
2024-08-16 20:22:46,791 - INFO - Loading the embedding model
2024-08-16 20:22:53,018 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:22:53,427 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:22:59,566 - INFO - Embedding model loaded
2024-08-16 20:24:21,213 - INFO - Loading the embedding model
2024-08-16 20:24:26,903 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:24:27,281 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:24:33,819 - INFO - Embedding model loaded
2024-08-16 20:24:42,819 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:24:43,532 - INFO - Summarizing document
2024-08-16 20:24:45,028 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:24:45,559 - INFO - Creating vector store
2024-08-16 20:24:46,277 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-16 20:24:46,583 - INFO - Vector store created
2024-08-16 20:24:46,583 - INFO - Saving the vector store
2024-08-16 20:26:45,690 - INFO - Loading the embedding model
2024-08-16 20:26:51,361 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:26:51,689 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:26:57,758 - INFO - Embedding model loaded
2024-08-16 20:27:35,992 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:27:36,682 - INFO - Summarizing document
2024-08-16 20:27:37,718 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:27:38,243 - INFO - Creating vector store
2024-08-16 20:27:39,588 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-16 20:27:39,850 - INFO - Vector store created
2024-08-16 20:27:39,851 - INFO - Saving the vector store
2024-08-16 20:28:37,928 - INFO - Loading the embedding model
2024-08-16 20:28:43,166 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:28:43,498 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:28:49,596 - INFO - Embedding model loaded
2024-08-16 20:30:50,050 - INFO - Loading the embedding model
2024-08-16 20:30:55,432 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:30:55,755 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:31:01,549 - INFO - Embedding model loaded
2024-08-16 20:32:24,048 - INFO - Loading the embedding model
2024-08-16 20:32:29,504 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:32:29,834 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:32:35,855 - INFO - Embedding model loaded
2024-08-16 20:32:49,136 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:32:49,964 - INFO - Summarizing document
2024-08-16 20:32:50,973 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:32:51,487 - INFO - Creating vector store
2024-08-16 20:32:52,933 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-16 20:32:53,278 - INFO - Vector store created
2024-08-16 20:32:53,278 - INFO - Saving the vector store
2024-08-16 20:32:53,278 - INFO - Saving embeddings
2024-08-16 20:32:53,278 - INFO - Vector store saved
2024-08-16 20:32:53,278 - INFO - Saving embeddings
2024-08-16 20:32:53,278 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:36:58,132 - INFO - Searching for the query
2024-08-16 20:36:58,132 - INFO - Searching for Implement Career Development Programs
2024-08-16 20:36:58,133 - INFO - Loading embedded data
2024-08-16 20:36:59,422 - INFO - Search completed
2024-08-16 20:36:59,422 - INFO - Search completed
2024-08-16 20:36:59,422 - INFO - Summarizing search results
2024-08-16 20:36:59,422 - INFO - Search results summarized
2024-08-16 20:40:29,336 - INFO - Loading the embedding model
2024-08-16 20:40:37,718 - INFO - PyTorch version 2.4.0+cu124 available.
2024-08-16 20:40:38,253 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
2024-08-16 20:40:44,320 - INFO - Embedding model loaded
2024-08-16 20:40:59,981 - INFO - Searching for the query
2024-08-16 20:40:59,984 - INFO - Searching for Implement Career Development Programs
2024-08-16 20:40:59,985 - INFO - Loading embedded data
2024-08-16 20:41:01,671 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-16 20:41:01,754 - INFO - Search completed
2024-08-16 20:41:01,754 - INFO - Search completed
2024-08-16 20:41:01,754 - INFO - Summarizing search results
2024-08-16 20:41:01,754 - INFO - Search results summarized
2024-08-16 20:42:39,142 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:42:39,142 - INFO - Loading docx document from uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
2024-08-16 20:42:39,504 - INFO - Creating documents from text
2024-08-16 20:42:39,998 - INFO - Summarizing document
2024-08-16 20:42:41,011 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:42:41,534 - INFO - Summarizing document
2024-08-16 20:42:42,011 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:42:42,536 - INFO - Creating vector store
2024-08-16 20:42:44,194 - INFO - Vector store created
2024-08-16 20:42:44,194 - INFO - Saving the vector store
2024-08-16 20:42:44,194 - INFO - Saving embeddings
2024-08-16 20:42:44,194 - INFO - Vector store saved
2024-08-16 20:42:44,194 - INFO - Saving embeddings
2024-08-16 20:42:44,194 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:42:44,212 - INFO - Creating thumbnail for uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
2024-08-16 20:42:55,518 - INFO - Searching for the query
2024-08-16 20:42:55,518 - INFO - Searching for wirebrush
2024-08-16 20:42:55,518 - INFO - Loading embedded data
2024-08-16 20:42:55,581 - INFO - Search completed
2024-08-16 20:42:55,581 - INFO - Search completed
2024-08-16 20:42:55,581 - INFO - Summarizing search results
2024-08-16 20:42:55,581 - INFO - Search results summarized
2024-08-16 20:43:13,345 - INFO - Searching for the query
2024-08-16 20:43:13,345 - INFO - Searching for Implement Career Development Programs
2024-08-16 20:43:13,345 - INFO - Loading embedded data
2024-08-16 20:43:13,362 - INFO - Search completed
2024-08-16 20:43:13,362 - INFO - Search completed
2024-08-16 20:43:13,362 - INFO - Summarizing search results
2024-08-16 20:43:13,362 - INFO - Search results summarized
2024-08-16 20:43:37,481 - INFO - Searching for the query
2024-08-16 20:43:37,481 - INFO - Searching for toyota corolla
2024-08-16 20:43:37,481 - INFO - Loading embedded data
2024-08-16 20:43:37,497 - INFO - Search completed
2024-08-16 20:43:37,497 - INFO - Search completed
2024-08-16 20:43:37,497 - INFO - Summarizing search results
2024-08-16 20:43:37,497 - INFO - Search results summarized
2024-08-16 20:45:57,119 - INFO - Loading pdf document from uploads\corolla-2020-toyota-owners-manual.pdf
2024-08-16 20:45:57,119 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:45:57,136 - INFO - Loading docx document from uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
2024-08-16 20:45:57,495 - INFO - Creating documents from text
2024-08-16 20:46:44,699 - INFO - Summarizing document
2024-08-16 20:46:45,723 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:46:46,242 - INFO - Summarizing document
2024-08-16 20:46:46,879 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:46:47,382 - INFO - Summarizing document
2024-08-16 20:46:47,888 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 20:46:48,402 - INFO - Creating vector store
2024-08-16 20:46:58,675 - INFO - Searching for the query
2024-08-16 20:46:58,675 - INFO - Searching for Manual for toyota corolla
2024-08-16 20:46:58,675 - INFO - Loading embedded data
2024-08-16 20:46:59,733 - INFO - Search completed
2024-08-16 20:46:59,749 - INFO - Search completed
2024-08-16 20:46:59,749 - INFO - Summarizing search results
2024-08-16 20:46:59,749 - INFO - Search results summarized
2024-08-16 20:47:07,643 - INFO - Vector store created
2024-08-16 20:47:07,643 - INFO - Saving the vector store
2024-08-16 20:47:07,643 - INFO - Saving embeddings
2024-08-16 20:47:07,643 - INFO - Vector store saved
2024-08-16 20:47:07,643 - INFO - Saving embeddings
2024-08-16 20:47:07,659 - INFO - Creating thumbnail for uploads\corolla-2020-toyota-owners-manual.pdf
2024-08-16 20:47:07,674 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 20:47:07,674 - INFO - Creating thumbnail for uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
2024-08-16 20:48:03,817 - INFO - Searching for the query
2024-08-16 20:48:03,818 - INFO - Searching for Manual for toyota corolla
2024-08-16 20:48:03,818 - INFO - Loading embedded data
2024-08-16 20:48:04,276 - INFO - Search completed
2024-08-16 20:48:04,276 - INFO - Search completed
2024-08-16 20:48:04,276 - INFO - Summarizing search results
2024-08-16 20:48:04,276 - INFO - Search results summarized
2024-08-16 20:48:14,034 - INFO - Searching for the query
2024-08-16 20:48:14,044 - INFO - Searching for Manual for toyota corolla
2024-08-16 20:48:14,044 - INFO - Loading embedded data
2024-08-16 20:48:14,067 - INFO - Search completed
2024-08-16 20:48:14,067 - INFO - Search completed
2024-08-16 20:48:14,074 - INFO - Summarizing search results
2024-08-16 20:48:14,074 - INFO - Search results summarized
2024-08-16 21:32:02,645 - INFO - Searching for the query
2024-08-16 21:32:02,645 - INFO - Searching for Manual Of toyota corolla
2024-08-16 21:32:02,645 - INFO - Loading embedded data
2024-08-16 21:32:02,795 - INFO - Search completed
2024-08-16 21:32:02,799 - INFO - Search completed
2024-08-16 21:32:02,800 - INFO - Summarizing search results
2024-08-16 21:32:02,804 - INFO - Search results summarized
2024-08-16 21:33:39,365 - INFO - Transcribing audio chunks from uploads\audio-2.mp3
2024-08-16 21:33:39,373 - INFO - Splitting audio file uploads\audio-2.mp3 by duration
2024-08-16 21:33:39,378 - INFO - Loading pdf document from uploads\corolla-2020-toyota-owners-manual.pdf
2024-08-16 21:33:39,404 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
2024-08-16 21:33:39,411 - INFO - Loading docx document from uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
2024-08-16 21:33:39,938 - INFO - Creating documents from text
2024-08-16 21:34:58,915 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk1.mp3
2024-08-16 21:35:15,275 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:35:18,193 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk2.mp3
2024-08-16 21:35:28,536 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:35:31,402 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk3.mp3
2024-08-16 21:35:38,932 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:35:43,996 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk4.mp3
2024-08-16 21:36:00,061 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:36:02,748 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk5.mp3
2024-08-16 21:36:09,913 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:36:12,705 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk6.mp3
2024-08-16 21:36:21,767 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:36:25,045 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk7.mp3
2024-08-16 21:36:32,036 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-16 21:36:37,503 - INFO - Summarizing document
2024-08-16 21:36:39,003 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2024-08-16 21:36:39,512 - INFO - Summarizing document
2024-08-16 21:36:40,128 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+1 -1
View File
@@ -31,7 +31,7 @@ app.add_middleware(
class SearchRequest(BaseModel):
query: str
@app.get("/load_documents")
@app.post("/load_documents")
def load_documents(directory: str):
# loading the documents from the directory
+6 -6
View File
@@ -12,7 +12,7 @@ def get_data_description(data_path):
data_name = data_path.split('/')[-1].split('\\')[-1].split('.')[0]
# print(data_name)
# open the data.json file
with open('data/data.json') as f:
with open('uploads/data.json') as f:
data = json.load(f)
existing_data = data.keys()
if data_name in existing_data:
@@ -25,10 +25,10 @@ def get_data_thumbnail(data_path, timestamp = None):
# ensuring no // or / or extension is present
file_name = data_path.split('/')[-1].split('\\')[-1].split('.')[0]
# first check is to see if the file_name has a .png image in the thumbnail folder
if os.path.exists(f'data/thumbnails/{file_name}.png'):
return f'data/thumbnails/{file_name}.png'
if os.path.exists(f'uploads/thumbnails/{file_name}.png'):
return f'uploads/thumbnails/{file_name}.png'
# the second check is to see if we have a folder with this file_name
elif os.path.exists(f'data/{file_name}'):
elif os.path.exists(f'uploads/{file_name}'):
# so now we want to access the first timestamp
if timestamp:
first = timestamp[0]
@@ -40,8 +40,8 @@ def get_data_thumbnail(data_path, timestamp = None):
# bringing them together
image_file = f"{start}-{end}s.png"
# niw checkin if the file exists
if os.path.exists(f'data/{file_name}/{image_file}'):
return f'data/{file_name}/{image_file}'
if os.path.exists(f'uploads/{file_name}/{image_file}'):
return f'uploads/{file_name}/{image_file}'
def summarize_doc_search(data):
summary = {}
Binary file not shown.
Binary file not shown.
Binary file not shown.
+1
View File
@@ -0,0 +1 @@
{"Employee_Attrition_Action_Plan": {"doc_id": "18ab2b5127504921b6f316475dbf7c69", "num_pages": 3, "doc_summary": "This document is about an Action Plan to Prevent Employee Attrition, which outlines five strategies to reduce employee turnover and improve job satisfaction. The plan includes:\n\n* Conducting comprehensive exit interviews to identify patterns and common issues\n* Implementing career development programs to provide employees with a clear career path\n* Enhancing compensation and benefits to ensure they are competitive\n* Fostering a positive work environment through diversity, equity, and inclusion initiatives\n* Improving work-life balance by promoting flexibility and reducing burnout"}, "How to change rear windshield wipers on TOYOTA Corolla": {"doc_id": "ee5616e1611d43f3bc1896a2da892c64", "num_pages": 6, "doc_summary": "This document is about a replacement guide for the rear windshield wipers on a TOYOTA Corolla VI Liftback (E90) model. The guide provides instructions on how to change the rear windshield wipers on various trim levels of the vehicle, including 1.6, 1.8 D, 1.3 i, 1.6 GTI, 1.6 4WD, 1.3, 1.4, and 1.6 EFi"}, "corolla-2020-toyota-owners-manual": {"doc_id": "b3d96d6eb1ab452e9b2c8228b15cda13", "num_pages": 588, "doc_summary": "This document is about the user manual for a Toyota Corolla, providing information and instructions on various aspects of the vehicle, including safety and security, vehicle status, driving operations, interior features, maintenance, and troubleshooting. The manual covers topics such as child seat installation, theft deterrent systems, reading driving-related information, operating the Entune audio system, and caring for the vehicle's interior and exterior. It also includes information on reporting safety defects and provides instructions for Canadian owners on seat belt and SRS air"}, "audio-2": {"doc_id": "5216621677ff4c7ca7c9029907d41e81", "num_pages": 7, "doc_summary": "This document is about a tutorial on how to change the oil in a car or truck, specifically geared towards beginners who have never done an oil change before. The video covers the necessary tools and materials needed, including a socket set, gloves, paper towels, oil and filter, oil filter wrench, oil catch pan, funnel, and breaker bar. The tutorial also explains how to find the correct oil and filter for the vehicle, and how to jack up the vehicle to access the oil drain plug. The"}}
Binary file not shown.

After

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

+5 -5
View File
@@ -405,15 +405,15 @@ def doc_summarizer(document_page: list) -> str:
#-----------------------------------------------------OTHERS--------------------------------------------------------------
def save_embedded_data(embeddings, key="data"):
def save_embedded_data(embeddings, path = "index/faiss_index",):
logger.info(f"Saving embeddings")
embeddings.save_local(f"index/faiss_index_{key}")
embeddings.save_local(f"index/faiss_index")
print("Embeddings saved")
return 'saved'
def load_embedded_data(embeddings=embeddings, key="data"):
def load_embedded_data(embeddings=embeddings, path = "index/faiss_index"):
logger.info(f"Loading embedded data")
embed_db = FAISS.load_local(f"index/faiss_index_{key}", embeddings, allow_dangerous_deserialization=True)
embed_db = FAISS.load_local(f"index/faiss_index", embeddings, allow_dangerous_deserialization=True)
return embed_db
#-----------------------------------------------------Data Loading Process----------------------------------------------------
@@ -522,7 +522,7 @@ def create_vector_store(documents: list, docs_id: list, num_pages: list):
logger.info(f"Vector store created")
logger.info(f"Saving the vector store")
# saving the vector store automatically
save_embedded_data(vector_store, key="data")
save_embedded_data(vector_store)
logger.info(f"Vector store saved")
return vector_store