diff --git a/app.py b/app.py index 445d8bc..265f548 100644 --- a/app.py +++ b/app.py @@ -19,7 +19,6 @@ from typing import Optional, Union, Dict, Any import os import requests import os -from PyPDF2 import PdfReader from scripts.transcriber import transcribe_media,group_words_into_sentences # Import the transcribe_media function from scripts.generate_summary import general_summary,custom_summary # Load environment variables @@ -170,4 +169,6 @@ async def startup_event(): if __name__ == "__main__": import uvicorn - uvicorn.run("app:app", host="0.0.0.0", port=3000, reload=True) \ No newline at end of file + uvicorn.run("app:app", host="0.0.0.0", port=5056, reload=True) + + \ No newline at end of file diff --git a/src/prompt.py b/src/prompt.py index a5bbc7c..11ec260 100644 --- a/src/prompt.py +++ b/src/prompt.py @@ -25,7 +25,6 @@ At the end of each section, include a field named "minutes_total" which represen - The JSON output must be properly formatted and valid. - Do not include any markdown or code block formatting markers (such as ```json) in your output. - Ensure that for each sentence you generate, every word in that sentence is assigned the same timestamp—the start timestamp of that sentence. - **Example Output JSON:** { @@ -38,6 +37,11 @@ At the end of each section, include a field named "minutes_total" which represen { "chapter": "Project Overview", "time_stamp": {"start": 5.12, "end": 5.68}, + "content": [ + {"text":"- overview of the project's objectives.","original_transcript_start":3.4,"original_transcript_end":5.7}, + {"text":"- It outlines the key milestones achieved so far.", "original_transcript_start":6.7, "original_transcript_end":10.5}, + {"text":"- main challenges faced during the project.", "original_transcript_start":10.8, "original_transcript_end":11.2} + ], "words_time_stamp": [ {"word": "Project", "timestamp": 5.12}, {"word": "Overview", "timestamp": 5.12} @@ -46,13 +50,17 @@ At the end of each section, include a field named "minutes_total" which represen { "chapter": "Budget Review", "time_stamp": {"start": 10.50, "end": 11.20}, + "content": [ + {"text":"- review of the current budget allocations.","original_transcript_start":10.5,"original_transcript_end":11.0}, + {"text":"- discussion on potential cost-saving measures.", "original_transcript_start":11.1, "original_transcript_end":12.0}, + {"text":"- approval of the budget for the next quarter.", "original_transcript_start":12.1, "original_transcript_end":13.0} + ], "words_time_stamp": [ {"word": "Budget", "timestamp": 10.50}, {"word": "Review", "timestamp": 10.50} ] } ] - }, "Outcomes": { "minutes_total": 3, "content": [ @@ -94,7 +102,7 @@ At the end of each section, include a field named "minutes_total" which represen ] } } - +NOTE: The content under each chapter provides a detailed bulleted explanation of the chapter. It includes "original_transcript_start" and "original_transcript_end," which indicate the timestamps for each bulleted point, referencing where to find it in the original transcript. Remember, every word in each sentence must have a single timestamp equal to the start timestamp of that sentence. Your output must strictly adhere to the provided structure, and the "minutes_total" for each section must be correctly calculated based on the start time of the first sentence and the end time of the last sentence, expressed as a decimal if necessary. NOTE : start and end time are in seconds , so take that into considerations when calculating the total time in mins """