starter transcript api added

2025-02-15 01:06:29 +01:00
parent 87ad727d6c
commit 24cf605f28
9 changed files with 330 additions and 0 deletions
@@ -0,0 +1,112 @@
 import os
 from typing import Optional
 from fastapi import FastAPI, HTTPException, Security, Depends
 from fastapi.security import APIKeyHeader
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from dotenv import load_dotenv
 import json
 from pydantic import BaseModel
 from langchain_openai import ChatOpenAI
 import requests
 import tempfile
 from typing import Dict, Any
 from fastapi.responses import Response
 from datetime import datetime
 from fastapi import HTTPException
 from pydantic import BaseModel
 from typing import Optional, Union, Dict, Any
 import os
 import requests
 import os
 from PyPDF2 import PdfReader
 from scripts.transcriber import transcribe_media,group_words_into_sentences  # Import the transcribe_media function
 # Load environment variables
 load_dotenv()
 API_KEY = os.getenv("API_KEY_ACCESS")
 # Initialize FastAPI app
 app = FastAPI(
    title="Microdot AI API",
    description="API For fire fighter",
    version="1.0.0"
 )
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # Setup API key authentication
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
 async def get_api_key(api_key_header: str = Security(api_key_header)) -> str:
    """Validate API key from header"""
    if not api_key_header or not api_key_header.startswith('Bearer '):
        raise HTTPException(
            status_code=401,
            detail={"error": "Unauthorized", "message": "API key is missing or invalid."}
        )
    token = api_key_header.split(' ')[1]
    if token != API_KEY:
        raise HTTPException(
            status_code=401,
            detail={"error": "Unauthorized", "message": "API key does not match."}
        )
    return token
 class TranscribeRequest(BaseModel):
    media_url: Optional[str] = None
    media_type: Optional[str]  # Corrected type hint for media_type
 class ChatResp(BaseModel):  # Added BaseModel inheritance
    error: Optional[str] = None
 class TranscriptResponse(BaseModel):
    transcript: dict  # Changed type hint for transcript to a dictionary
@app.post("/microdot-ai/transcribe")
 async def chat_endpoint(
    request: TranscribeRequest,
    api_key: str = Depends(get_api_key)
 ):
    try:
        # Use the transcribe_media function to transcribe the media
        if request.media_url:
            transcription_response = transcribe_media(request.media_url, media_type=request.media_type)
            if transcription_response is None:
                raise HTTPException(status_code=500, detail="Transcription failed.")
            print(f"Transcription response: {transcription_response}")  # Debugging print
        # Parse response
        words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"]
        transcript = group_words_into_sentences(words=words)
        return TranscriptResponse(
            transcript=transcript,  # Corrected to return the transcript
            error=None
        )
    except Exception as e:
        print(f"Error processing chat request: {str(e)}")  # Print statement added
        raise HTTPException(
            status_code=500,
            detail=f"Error processing chat request: {str(e)}"
        )
@app.on_event("startup")
 async def startup_event():
    """Initialize required components on startup"""
    pass
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=3000, reload=True)
@@ -0,0 +1,28 @@
 openai
 pandas
 python-dotenv
 fastapi
 uvicorn
 langchain-community
 langchain-openai
 pydantic
 pypdf
 pypandoc
 Spire.Doc
 plum-dispatch==1.7.4
 scikit-learn
 werkzeug
 python-multipart
 langgraph
 tiktoken
 langchainhub
 chromadb
 langchain
 langchain-text-splitters
 beautifulsoup4
 deepgram_sdk
 moviepy
 yt-dlp
 ffmpeg-python
 reportlab
 anthropic
@@ -0,0 +1,190 @@
 import os
 import logging
 import re
 import uuid
 import yt_dlp
 from deepgram.utils import verboselogs
 from dotenv import load_dotenv
 load_dotenv()
 from deepgram import DeepgramClient, PrerecordedOptions, FileSource
 # Define your URLs (example URLs)
 #audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
 #video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
 # Folder for file uploads/downloads
 # Folder for file uploads/downloads
 UPLOAD_FOLDER = os.path.join(os.getcwd(), "../uploads")
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 def sanitize_filename(name: str) -> str:
    """
    Remove characters from the filename that are not allowed in many file systems.
    """
    return re.sub(r'[^\w\s-]', '', name).strip().replace(' ', '_')
 def extract_audio(url: str, output_template=os.path.join(UPLOAD_FOLDER, "%(title)s.%(ext)s")) -> str:
    """
    Download and extract audio from a video URL using yt-dlp.
    The file will be saved in the 'upload' folder.
    Returns:
        str: The absolute path to the downloaded audio file (with a unique id appended).
    """
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": output_template,
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": "mp3",
            "preferredquality": "192",
        }],
        "quiet": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        # Prepare the filename from the info.
        # Note: prepare_filename returns the filename *before* postprocessing,
        # so we change the extension to mp3.
        original_filepath = os.path.splitext(ydl.prepare_filename(info))[0] + ".mp3"
    # Debug: list files in the upload folder
    if not os.path.exists(original_filepath):
        files = os.listdir(UPLOAD_FOLDER)
        print("Warning: Could not find expected file.")
        print("Files in upload folder:", files)
        raise FileNotFoundError(f"Expected audio file not found: {original_filepath}")
    # Get the video's title and sanitize it
    title = info.get('title', 'audio')
    safe_title = sanitize_filename(title)
    # Generate a unique identifier
    unique_id = uuid.uuid4().hex  # Unique identifier in hex format
    # Construct the new filename with the unique id appended.
    new_audio_filename = f"{safe_title}_{unique_id}.mp3"
    new_audio_filepath = os.path.join(UPLOAD_FOLDER, new_audio_filename)
    # Rename the downloaded file to include the unique ID.
    os.rename(original_filepath, new_audio_filepath)
    print(f"Renamed file to: {new_audio_filepath}")
    # Return the absolute path to the renamed audio file.
    return os.path.abspath(new_audio_filepath)
 def transcribe_media(file_loc: str, media_type: str = "audio"):
    """
    Transcribe media using Deepgram.
    If media_type is "audio" (remote URL), use Deepgram's URL transcription.
    If media_type is "video" (remote URL), extract audio locally (in the upload folder),
    transcribe via file, and then delete the local audio file.
    Args:
        file_loc (str): URL to the remote audio or video file.
        media_type (str): "audio" or "video".
    Returns:
        dict: The transcription response from Deepgram.
    """
    api_key = os.getenv("DEEPGRAM_API_KEY2")
    print(f"Using Deepgram API Key: {api_key}")
    local_audio_path="some_rand"
    try:
        deepgram: DeepgramClient = DeepgramClient(api_key=api_key)
        options: PrerecordedOptions = PrerecordedOptions(
            model="nova-3",
            smart_format=True,
            diarize=True,
        )
        if media_type.lower() == "audio":
            # For remote audio files, use the URL transcription method.
            response = deepgram.listen.rest.v("1").transcribe_url({"url": file_loc}, options)
        elif media_type.lower() == "video":
            # For remote video files, first extract the audio locally.
            local_audio_path = extract_audio(file_loc)
            print(f"Extracted audio to: {local_audio_path}")
            # Transcribe using the local file method.
            with open(local_audio_path, "rb") as file:
                buffer_data = file.read()
            payload: FileSource = {"buffer": buffer_data}
            response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
            # Clean up: delete the local audio file.
            if os.path.exists(local_audio_path):
                os.remove(local_audio_path)
                print(f"Deleted local audio file: {local_audio_path}")
        else:
            raise ValueError("media_type must be either 'audio' or 'video'.")
        print(f"Transcription response: {response}\n\n")
        return response
    except Exception as e:
        print(f"Exception during transcription: {e}")
        return None
    finally:
            # Clean up: delete the local audio file.
            if os.path.exists(local_audio_path):
                os.remove(local_audio_path)
                print(f"Deleted local audio file: {local_audio_path}")
 def group_words_into_sentences(words, max_words=15):
    sentences = []
    current_sentence = []
    current_speaker = None
    start_time = None
    for i, word_info in enumerate(words):
        word = word_info["punctuated_word"]
        speaker = word_info["speaker"]
        start = word_info["start"]
        end = word_info["end"]
        # If speaker changes or sentence reaches max length, start a new sentence
        if speaker != current_speaker:
            if current_sentence:
                sentences.append({
                    "sentence": " ".join([w["word"] for w in current_sentence]),
                    "speaker": current_speaker,
                    "start": start_time,
                    "end": words[i-1]["end"],
                    "words": current_sentence
                })
            current_sentence = []
            current_speaker = speaker
            start_time = start
        # Append word with metadata inside the current sentence
        current_sentence.append({"word": word, "start": start, "end": end})
    # Append the last sentence if any words remain
    if current_sentence:
        sentences.append({
            "sentence": " ".join([w["word"] for w in current_sentence]),
            "speaker": current_speaker,
            "start": start_time,
            "end": words[-1]["end"],
            "words": current_sentence
        })
    return {"sentences": sentences}
 if __name__ == "__main__":
    audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
    video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
 # Folder for file uploads/downloads
    response = transcribe_media(video_url, media_type="video")
    print(response)