Add tiered summarization based on pricing plans

- Implement advanced AI summarization with action items for Pro plan - Create basic bullet-point summarization for Freemium plan - Add plan tier validation and feature differentiation - Support speaker identification in transcripts - Define plan limits (600 mins Pro/200 mins Freemium)
2025-04-24 10:15:13 +01:00
commit c345538243
16 changed files with 12484 additions and 0 deletions
@@ -0,0 +1,170 @@
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
@@ -0,0 +1,368 @@
 # Microdot AI API Documentation
 ## Overview
 Microdot AI provides a powerful API for transcribing audio/video content and generating AI-powered summaries. The API supports different pricing tiers with varying features and capabilities.
 ## Base URL
 ```
 https://api.microdot.ai
 ```
 ## Authentication
 All API requests require authentication using a Bearer token.
 ```
 Authorization: Bearer YOUR_API_KEY
 ```
 ## Endpoints
 ### 1. Transcribe Media
 Transcribes audio or video content from a URL.
 **Endpoint:** `/microdot-ai/transcribe`
 **Method:** POST
 **Request Body:**
 ```json
 {
  "media_url": "https://example.com/audio-file.mp3",
  "media_type": "audio"  // "audio" or "video"
 }
 ```
 **Response:**
 ```json
 {
  "transcript": {
    "sentences": [
      {
        "sentence": "Hello and welcome to the meeting.",
        "speaker": "speaker_0",
        "start": 0.0,
        "end": 2.5,
        "words": [
          {
            "word": "Hello",
            "start": 0.0,
            "end": 0.4
          },
          // Additional words...
        ]
      },
      // Additional sentences...
    ]
  }
 }
 ```
 ### 2. Generate General Summary
 Generates a summary of a transcript based on the user's plan tier.
 **Endpoint:** `/microdot-ai/general-summary`
 **Method:** POST
 **Request Body:**
 ```json
 {
  "transcript": "JSON_STRING_OF_TRANSCRIPT",
  "plan_tier": "pro"  // "freemium" or "pro"
 }
 ```
 #### Freemium Plan Response:
 ```json
 {
  "transcript": {
    "Key_Points": [
      {
        "text": "Team discussed Q3 marketing strategy.",
        "timestamp": 120.5
      },
      {
        "text": "Budget approval needed by Friday.",
        "timestamp": 360.2
      },
      {
        "text": "New product launch delayed until September.",
        "timestamp": 480.7
      }
    ],
    "Summary": {
      "text": "Marketing team meeting to review Q3 plans and budget requirements. Team agreed on strategy but product launch delayed.",
      "duration_minutes": 15.5
    }
  }
 }
 ```
 #### Pro Plan Response:
 ```json
 {
  "transcript": {
    "Purpose": {
      "text": "Discuss project progress and define upcoming milestones."
    },
    "Chapters": {
      "minutes_total": 3,
      "content": [
        {
          "chapter": "Project Overview",
          "time_stamp": {"start": 5.12, "end": 5.68},
          "content": [
            {"text": "- overview of the project's objectives.", "original_transcript_start": 3.4, "original_transcript_end": 5.7},
            // Additional content...
          ],
          "words_time_stamp": [
            {"word": "Project", "timestamp": 5.12},
            {"word": "Overview", "timestamp": 5.12}
          ]
        },
        // Additional chapters...
      ]
    },
    "Outcomes": {
      "minutes_total": 3,
      "content": [
        // Outcome content...
      ]
    },
    "Action_Items_Per_User": [
      {
        "speaker": "Speaker_A",
        "minutes_total": 3,
        "action_items": [
          // Action items...
        ]
      }
    ]
  }
 }
 ```
 ### 3. Generate Template Summary
 Generates a custom summary based on a user-defined template.
 **Endpoint:** `/microdot-ai/template-summary`
 **Method:** POST
 **Request Body:**
 ```json
 {
  "transcript": "JSON_STRING_OF_TRANSCRIPT",
  "template": "JSON_STRING_OF_TEMPLATE"
 }
 ```
 **Example Template:**
 ```json
 {
  "Key_Points": "Summarize the most critical discussion points from the meeting.",
  "Summary": "Provide a brief overall summary of what was discussed.",
  "Next_Steps": "List the next steps decided during the meeting, including any action items."
 }
 ```
 **Response:**
 ```json
 {
  "transcript": {
    "Key_Points": {
      "minutes_total": 3.5,
      "content": [
        {
          "text": "Introductions between Diane Taylor and Cody Smith.",
          "time_stamp": {"start": 5.12, "end": 5.68},
          "words_time_stamp": [
            {"word": "Introductions", "timestamp": 5.12},
            // Additional words...
          ]
        }
      ]
    },
    "Summary": {
      // Summary content...
    },
    "Next_Steps": {
      // Next steps content...
    }
  }
 }
 ```
 ## Plan Features
 ### Freemium Plan
 - 200 minutes of transcription per month
 - Basic AI summarization (short bullet points)
 - 7-day transcript history
 - Limited integrations (Google Meet & Zoom only)
 ### Pro Plan
 - 600 minutes of transcription per month
 - Advanced AI summarization with action items
 - Speaker identification
 - 30-day transcript history
 - Multi-platform integrations (Slack, Notion, Asana, Microsoft Teams)
 ## Error Responses
 All endpoints return standard HTTP status codes:
 - `200 OK`: Request successful
 - `400 Bad Request`: Invalid request parameters
 - `401 Unauthorized`: Invalid or missing API key
 - `500 Internal Server Error`: Server-side error
 Error response format:
 ```json
 {
  "detail": {
    "error": "Error type",
    "message": "Detailed error message"
  }
 }
 ```
 ## Rate Limits
 - Freemium Plan: 100 requests per day
 - Pro Plan: 1000 requests per day
 Exceeding your plan's transcription minutes will result in a `402 Payment Required` response until the next billing cycle.
 ## Code Examples
 ### Python
 ```python
 import requests
 import json
 # API endpoint
 base_url = "https://api.microdot.ai"
 # Your API key
 api_key = "your_api_key_here"
 # Headers
 headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
 }
 # 1. Transcribe an audio file
 transcribe_payload = {
    "media_url": "https://example.com/meeting-recording.mp3",
    "media_type": "audio"
 }
 transcribe_response = requests.post(
    f"{base_url}/microdot-ai/transcribe", 
    headers=headers,
    json=transcribe_payload
 )
 # Check if transcription was successful
 if transcribe_response.status_code == 200:
    transcript_data = transcribe_response.json()
    transcript_json = json.dumps(transcript_data["transcript"])
    # 2. Generate a summary (Pro plan)
    summary_payload = {
        "transcript": transcript_json,
        "plan_tier": "pro"
    }
    summary_response = requests.post(
        f"{base_url}/microdot-ai/general-summary", 
        headers=headers,
        json=summary_payload
    )
    if summary_response.status_code == 200:
        summary_data = summary_response.json()
        print("Summary generated successfully!")
        print(json.dumps(summary_data["transcript"], indent=2))
    else:
        print(f"Summary generation failed: {summary_response.text}")
 else:
    print(f"Transcription failed: {transcribe_response.text}")
 ```
 ### JavaScript
 ```javascript
 const axios = require('axios');
 // API endpoint
 const baseUrl = 'https://api.microdot.ai';
 // Your API key
 const apiKey = 'your_api_key_here';
 // Headers
 const headers = {
  'Authorization': `Bearer ${apiKey}`,
  'Content-Type': 'application/json'
 };
 // 1. Transcribe an audio file
 const transcribeAudio = async () => {
  const transcribePayload = {
    media_url: 'https://example.com/meeting-recording.mp3',
    media_type: 'audio'
  };
  try {
    const transcribeResponse = await axios.post(
      `${baseUrl}/microdot-ai/transcribe`,
      transcribePayload,
      { headers }
    );
    const transcriptData = transcribeResponse.data;
    const transcriptJson = JSON.stringify(transcriptData.transcript);
    // 2. Generate a summary (Pro plan)
    const summaryPayload = {
      transcript: transcriptJson,
      plan_tier: 'pro'
    };
    const summaryResponse = await axios.post(
      `${baseUrl}/microdot-ai/general-summary`,
      summaryPayload,
      { headers }
    );
    console.log('Summary generated successfully!');
    console.log(JSON.stringify(summaryResponse.data.transcript, null, 2));
  } catch (error) {
    console.error('Error:', error.response ? error.response.data : error.message);
  }
 };
 transcribeAudio();
 ```
@@ -0,0 +1,2 @@
 # ds_microdot
@@ -0,0 +1,176 @@
 import os
 from typing import Optional
 from fastapi import FastAPI, HTTPException, Security, Depends
 from fastapi.security import APIKeyHeader
 from fastapi.middleware.cors import CORSMiddleware
 from dotenv import load_dotenv
 import json
 from pydantic import BaseModel
 from fastapi import HTTPException
 import os
 from scripts.transcriber import transcribe_media, group_words_into_sentences
 from scripts.generate_summary import general_summary, custom_summary
 from src.models import PlanTier, PlanLimits
 # Load environment variables
 load_dotenv()
 API_KEY = os.getenv("API_KEY_ACCESS")
 # Initialize FastAPI app
 app = FastAPI(
    title="Microdot AI API",
    description="API For fire fighter",
    version="1.0.0"
 )
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # Setup API key authentication
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
 async def get_api_key(api_key_header: str = Security(api_key_header)) -> str:
    """Validate API key from header"""
    if not api_key_header or not api_key_header.startswith('Bearer '):
        raise HTTPException(
            status_code=401,
            detail={"error": "Unauthorized", "message": "API key is missing or invalid."}
        )
    token = api_key_header.split(' ')[1]
    if token != API_KEY:
        raise HTTPException(
            status_code=401,
            detail={"error": "Unauthorized", "message": "API key does not match."}
        )
    return token
 class TranscribeRequest(BaseModel):
    media_url: Optional[str] = None
    media_type: Optional[str]  # Corrected type hint for media_type
 class ChatResp(BaseModel):  # Added BaseModel inheritance
    error: Optional[str] = None
 class TranscriptResponse(BaseModel):
    transcript: dict  # Changed type hint for transcript to a dictionary
 class GeneralSummaryRequest(BaseModel):
    transcript: Optional[str] = None
    plan_tier: Optional[str] = "pro"  # Default to pro plan if not specified
 class TemplateSummaryRequest(BaseModel):
    transcript: Optional[str] = None
    template: Optional[str] = None
@app.post("/microdot-ai/transcribe")
 async def chat_endpoint(
    request: TranscribeRequest,
    api_key: str = Depends(get_api_key)
 ):
    try:
        # Use the transcribe_media function to transcribe the media
        if request.media_url:
            transcription_response = transcribe_media(request.media_url, media_type=request.media_type)
            if transcription_response is None:
                raise HTTPException(status_code=500, detail="Transcription failed.")
            print(f"Transcription response: {transcription_response}")  # Debugging print
        # Parse response
        words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"]
        transcript = group_words_into_sentences(words=words)
        return TranscriptResponse(
            transcript=transcript,  # Corrected to return the transcript
            error=None
        )
    except Exception as e:
        print(f"Error processing chat request: {str(e)}")  # Print statement added
        raise HTTPException(
            status_code=500,
            detail=f"Error processing chat request: {str(e)}"
        )
@app.post("/microdot-ai/general-summary")
 async def general_summary_endpoint(
    request: GeneralSummaryRequest,
    api_key: str = Depends(get_api_key)
 ):
    try:
        if not request.transcript:
            raise HTTPException(status_code=400, detail="Transcript is required.")
        # Get the plan tier from the request or default to pro
        plan_tier = request.plan_tier.lower() if request.plan_tier else "pro"
        # Validate plan tier using our PlanTier enum
        valid_tiers = [t.value for t in PlanTier]
        if plan_tier not in valid_tiers:
            plan_tier = PlanTier.PRO.value  # Default to pro if invalid tier
        # Get the appropriate summary type for this plan tier
        summary_type = PlanLimits.get_limit(plan_tier, "summary_type")
        # Generate the summary based on the plan tier
        response = general_summary(json.loads(request.transcript), plan_tier=plan_tier)
        return TranscriptResponse(
            transcript=response
        )
    except Exception as e:
        print(f"Error processing general summary request: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Error processing general summary request: {str(e)}"
        )
@app.post("/microdot-ai/template-summary")
 async def template_summary_endpoint(  # Corrected function name to avoid conflict
    request: TemplateSummaryRequest,
    api_key: str = Depends(get_api_key)
 ):
    try:
        if not request.transcript:
            raise HTTPException(status_code=400, detail="Transcript is required.")
        if not request.template:
            raise HTTPException(status_code=400, detail="Template is required.")
        transcript = json.loads(request.transcript)
        template = json.loads(request.template)  # Removed the check for missing template as it's now required
        response = custom_summary(template, transcript)
        return TranscriptResponse(
            transcript=response
        )
    except Exception as e:
        print(f"Error processing template summary request: {str(e)}")  # Updated print statement for clarity
        raise HTTPException(
            status_code=500,
            detail=f"Error processing template summary request: {str(e)}"
        )
@app.on_event("startup")
 async def startup_event():
    """Initialize required components on startup"""
    pass
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=5056, reload=True)
@@ -0,0 +1,5 @@
@@ -0,0 +1,28 @@
 openai
 pandas
 python-dotenv
 fastapi
 uvicorn
 langchain-community
 langchain-openai
 pydantic
 pypdf
 pypandoc
 Spire.Doc
 plum-dispatch==1.7.4
 scikit-learn
 werkzeug
 python-multipart
 langgraph
 tiktoken
 langchainhub
 chromadb
 langchain
 langchain-text-splitters
 beautifulsoup4
 deepgram_sdk
 moviepy
 yt-dlp
 ffmpeg-python
 reportlab
 anthropic
@@ -0,0 +1,61 @@
 import anthropic
 import os
 from dotenv import load_dotenv
 import json
 from src.prompt import advanced_summary_prompt, basic_summary_prompt, custom_template_prompt
 load_dotenv()
 def general_summary(transcription, plan_tier="pro"):
    """
    Generate a summary of the transcription based on the user's plan tier.
    Args:
        transcription: The transcription to summarize
        plan_tier: The user's plan tier ("freemium" or "pro")
    Returns:
        A JSON object containing the summary
    """
    client = anthropic.Anthropic(
        api_key=os.getenv("ANTHTROPIC_API_KEY"),
    )
    # Select the appropriate prompt based on the user's plan tier
    if plan_tier.lower() == "freemium":
        prompt = basic_summary_prompt
        max_tokens = 2000  # Reduced token count for basic summaries
    else:  # Default to pro
        prompt = advanced_summary_prompt
        max_tokens = 4000
    message = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=max_tokens,
        messages=[
            {"role": "user", "content": f"{prompt}"},
            {"role": "user", "content": f"Transcription: {transcription}"}
        ]
    )
    text = message.content[0].text
    return json.loads(text)
 def custom_summary(template, transcription):
    client = anthropic.Anthropic(
        api_key=os.getenv("ANTHTROPIC_API_KEY"),
    )
    message = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=8000,
        messages=[
            {"role": "user", "content": f"{custom_template_prompt}"},
            {"role": "user", "content": f"TEMPLATE : {template}"},
            {"role": "user", "content": f"Transcription: {transcription}"}
        ]
    )
    text = message.content[0].text
    return json.loads(text)
@@ -0,0 +1,190 @@
 import os
 import logging
 import re
 import uuid
 import yt_dlp
 from deepgram.utils import verboselogs
 from dotenv import load_dotenv
 load_dotenv()
 from deepgram import DeepgramClient, PrerecordedOptions, FileSource
 # Define your URLs (example URLs)
 #audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
 #video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
 # Folder for file uploads/downloads
 # Folder for file uploads/downloads
 UPLOAD_FOLDER = os.path.join(os.getcwd(), "../uploads")
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 def sanitize_filename(name: str) -> str:
    """
    Remove characters from the filename that are not allowed in many file systems.
    """
    return re.sub(r'[^\w\s-]', '', name).strip().replace(' ', '_')
 def extract_audio(url: str, output_template=os.path.join(UPLOAD_FOLDER, "%(title)s.%(ext)s")) -> str:
    """
    Download and extract audio from a video URL using yt-dlp.
    The file will be saved in the 'upload' folder.
    Returns:
        str: The absolute path to the downloaded audio file (with a unique id appended).
    """
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": output_template,
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": "mp3",
            "preferredquality": "192",
        }],
        "quiet": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        # Prepare the filename from the info.
        # Note: prepare_filename returns the filename *before* postprocessing,
        # so we change the extension to mp3.
        original_filepath = os.path.splitext(ydl.prepare_filename(info))[0] + ".mp3"
    # Debug: list files in the upload folder
    if not os.path.exists(original_filepath):
        files = os.listdir(UPLOAD_FOLDER)
        print("Warning: Could not find expected file.")
        print("Files in upload folder:", files)
        raise FileNotFoundError(f"Expected audio file not found: {original_filepath}")
    # Get the video's title and sanitize it
    title = info.get('title', 'audio')
    safe_title = sanitize_filename(title)
    # Generate a unique identifier
    unique_id = uuid.uuid4().hex  # Unique identifier in hex format
    # Construct the new filename with the unique id appended.
    new_audio_filename = f"{safe_title}_{unique_id}.mp3"
    new_audio_filepath = os.path.join(UPLOAD_FOLDER, new_audio_filename)
    # Rename the downloaded file to include the unique ID.
    os.rename(original_filepath, new_audio_filepath)
    print(f"Renamed file to: {new_audio_filepath}")
    # Return the absolute path to the renamed audio file.
    return os.path.abspath(new_audio_filepath)
 def transcribe_media(file_loc: str, media_type: str = "audio"):
    """
    Transcribe media using Deepgram.
    If media_type is "audio" (remote URL), use Deepgram's URL transcription.
    If media_type is "video" (remote URL), extract audio locally (in the upload folder),
    transcribe via file, and then delete the local audio file.
    Args:
        file_loc (str): URL to the remote audio or video file.
        media_type (str): "audio" or "video".
    Returns:
        dict: The transcription response from Deepgram.
    """
    api_key = os.getenv("DEEPGRAM_API_KEY2")
    print(f"Using Deepgram API Key: {api_key}")
    local_audio_path="some_rand"
    try:
        deepgram: DeepgramClient = DeepgramClient(api_key=api_key)
        options: PrerecordedOptions = PrerecordedOptions(
            model="nova-3",
            smart_format=True,
            diarize=True,
        )
        if media_type.lower() == "audio":
            # For remote audio files, use the URL transcription method.
            response = deepgram.listen.rest.v("1").transcribe_url({"url": file_loc}, options)
        elif media_type.lower() == "video":
            # For remote video files, first extract the audio locally.
            local_audio_path = extract_audio(file_loc)
            print(f"Extracted audio to: {local_audio_path}")
            # Transcribe using the local file method.
            with open(local_audio_path, "rb") as file:
                buffer_data = file.read()
            payload: FileSource = {"buffer": buffer_data}
            response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
            # Clean up: delete the local audio file.
            if os.path.exists(local_audio_path):
                os.remove(local_audio_path)
                print(f"Deleted local audio file: {local_audio_path}")
        else:
            raise ValueError("media_type must be either 'audio' or 'video'.")
        return response
    except Exception as e:
        print(f"Exception during transcription: {e}")
        return None
    finally:
            # Clean up: delete the local audio file.
            if os.path.exists(local_audio_path):
                os.remove(local_audio_path)
                print(f"Deleted local audio file: {local_audio_path}")
 def group_words_into_sentences(words, max_words=15):
    sentences = []
    current_sentence = []
    current_speaker = None
    start_time = None
    for i, word_info in enumerate(words):
        word = word_info["punctuated_word"]
        speaker = word_info["speaker"]
        start = word_info["start"]
        end = word_info["end"]
        # If speaker changes or sentence reaches max length, start a new sentence
        if speaker != current_speaker:
            if current_sentence:
                sentences.append({
                    "sentence": " ".join([w["word"] for w in current_sentence]),
                    "speaker": current_speaker,
                    "start": start_time,
                    "end": words[i-1]["end"],
                    "words": current_sentence
                })
            current_sentence = []
            current_speaker = speaker
            start_time = start
        # Append word with metadata inside the current sentence
        current_sentence.append({"word": word, "start": start, "end": end})
    # Append the last sentence if any words remain
    if current_sentence:
        sentences.append({
            "sentence": " ".join([w["word"] for w in current_sentence]),
            "speaker": current_speaker,
            "start": start_time,
            "end": words[-1]["end"],
            "words": current_sentence
        })
    return {"sentences": sentences}
 if __name__ == "__main__":
    audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
    video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
 # Folder for file uploads/downloads
    response = transcribe_media(video_url, media_type="video")
    print(response)
@@ -0,0 +1,44 @@
 from pydantic import BaseModel
 from typing import Optional, Dict, List
 from enum import Enum
 class PlanTier(str, Enum):
    FREEMIUM = "freemium"
    PRO = "pro"
 class PlanLimits:
    """
    Defines the limits for each plan tier
    """
    LIMITS = {
        PlanTier.FREEMIUM: {
            "transcription_minutes": 200,
            "summary_type": "basic",
            "transcript_history_days": 7,
            "integrations": ["google_meet", "zoom"]
        },
        PlanTier.PRO: {
            "transcription_minutes": 600,
            "summary_type": "advanced",
            "transcript_history_days": 30,
            "integrations": ["google_meet", "zoom", "slack", "notion", "asana", "microsoft_teams"]
        }
    }
    @classmethod
    def get_limit(cls, tier: str, limit_name: str):
        """
        Get a specific limit for a plan tier
        Args:
            tier: The plan tier (freemium or pro)
            limit_name: The name of the limit to retrieve
        Returns:
            The limit value or None if not found
        """
        tier_enum = PlanTier(tier.lower()) if tier.lower() in [t.value for t in PlanTier] else PlanTier.FREEMIUM
        if tier_enum in cls.LIMITS and limit_name in cls.LIMITS[tier_enum]:
            return cls.LIMITS[tier_enum][limit_name]
        return None
@@ -0,0 +1,206 @@
 # Advanced (Pro Plan) summary prompt
 advanced_summary_prompt = """
 You are an AI meeting transcript summary formatter. You will be provided with a detailed meeting transcript that includes sentence-level summaries with timestamps (in seconds), speaker details, and word-level timestamps. Your task is to generate a concise summary of the meeting organized into four sections:
 1. **Purpose:**
   - Provide a brief description of the meeting's purpose.
 2. **Chapters:**
   - Provide a list of chapter titles that segment the meeting into key parts.
   - For each chapter, include a timestamp range (with "start" and "end" in seconds) indicating when that chapter begins and ends.
   - Additionally, include a list of word-level timestamps for each word in the chapter. **Important:** For every word in a sentence, the timestamp must be the start timestamp of the sentence to which the word belongs.
 3. **Outcomes:**
   - Provide a coherent description of the meeting outcomes.
   - For each outcome, include a timestamp range (with "start" and "end" in seconds) corresponding to the relevant moment, and include word-level timestamps for each word (using the sentence’s start timestamp for every word).
 4. **Action Items:**
   - Provide a list of actionable items derived from the meeting discussion.
   - For each action item, include either a single timestamp or a timestamp range (if available) and a list of word-level timestamps for each word (again, each word's timestamp is the start timestamp of its parent sentence).
 At the end of each section, include a field named "minutes_total" which represents the total duration in minutes for that section. Calculate this value by using the start time of the first sentence and the end time of the last sentence within the section. If the duration is not a whole number, express it as a decimal (e.g., 0.5).
 **Instructions:**
 - Return a JSON response containing only the required fields with no additional commentary.
 - The JSON output must be properly formatted and valid.
 - Do not include any markdown or code block formatting markers (such as ```json) in your output.
 - Ensure that for each sentence you generate, every word in that sentence is assigned the same timestamp—the start timestamp of that sentence.
 **Example Output JSON:**
 {
  "Purpose": {
    "text": "Discuss project progress and define upcoming milestones."
  },
  "Chapters": {
    "minutes_total": 3,
    "content": [
      {
        "chapter": "Project Overview",
        "time_stamp": {"start": 5.12, "end": 5.68},
        "content": [
          {"text":"- overview of the project's objectives.","original_transcript_start":3.4,"original_transcript_end":5.7},
          {"text":"- It outlines the key milestones achieved so far.", "original_transcript_start":6.7, "original_transcript_end":10.5},
          {"text":"- main challenges faced during the project.", "original_transcript_start":10.8, "original_transcript_end":11.2}
        ],
        "words_time_stamp": [
          {"word": "Project", "timestamp": 5.12},
          {"word": "Overview", "timestamp": 5.12}
        ]
      },
      {
        "chapter": "Budget Review",
        "time_stamp": {"start": 10.50, "end": 11.20},
        "content": [
          {"text":"- review of the current budget allocations.","original_transcript_start":10.5,"original_transcript_end":11.0},
          {"text":"- discussion on potential cost-saving measures.", "original_transcript_start":11.1, "original_transcript_end":12.0},
          {"text":"- approval of the budget for the next quarter.", "original_transcript_start":12.1, "original_transcript_end":13.0}
        ],
        "words_time_stamp": [
          {"word": "Budget", "timestamp": 10.50},
          {"word": "Review", "timestamp": 10.50}
        ]
      }
    ]
  "Outcomes": {
    "minutes_total": 3,
    "content": [
      {
        "text": "Key performance metrics were defined and improvement areas identified.",
        "time_stamp": {"start": 15.30, "end": 16.00},
        "words_time_stamp": [
          {"word": "Key", "timestamp": 15.30},
          {"word": "performance", "timestamp": 15.30},
          {"word": "metrics", "timestamp": 15.30},
          {"word": "were", "timestamp": 15.30},
          {"word": "defined", "timestamp": 15.30},
          {"word": "and", "timestamp": 15.30},
          {"word": "improvement", "timestamp": 15.30},
          {"word": "areas", "timestamp": 15.30},
          {"word": "identified", "timestamp": 15.30}
        ]
      }
    ]
  },
  "Action_Items_Per_User": [
  {
    "speaker": "Speaker_A"
    "minutes_total": 3,
    "action_items": [
      {
        "text": "Prepare a detailed budget report for the next meeting.",
        "time_stamp": {"start": 30.45, "end": 30.45},
        "words_time_stamp": [
          {"word": "Prepare", "timestamp": 30.45},
          {"word": "a", "timestamp": 30.45},
          {"word": "detailed", "timestamp": 30.45},
          {"word": "budget", "timestamp": 30.45},
          {"word": "report", "timestamp": 30.45},
          {"word": "for", "timestamp": 30.45},
          {"word": "the", "timestamp": 30.45},
          {"word": "next", "timestamp": 30.45},
          {"word": "meeting", "timestamp": 30.45}
        ]
      }
    ]
  },
  {
    "speaker": "unassigned",
    "minutes_total": 2,
    "action_items": [
      {
        "text": "Follow up with the marketing team for the latest campaign updates.",
        "time_stamp": {"start": 45.67, "end": 45.67},
        "words_time_stamp": [
          {"word": "Follow", "timestamp": 45.67},
          {"word": "up", "timestamp": 45.67},
          {"word": "with", "timestamp": 45.67},
          {"word": "the", "timestamp": 45.67},
          {"word": "marketing", "timestamp": 45.67},
          {"word": "team", "timestamp": 45.67},
          {"word": "for", "timestamp": 45.67},
          {"word": "the", "timestamp": 45.67},
          {"word": "latest", "timestamp": 45.67},
          {"word": "campaign", "timestamp": 45.67},
          {"word": "updates", "timestamp": 45.67}
        ]
      }
    ]
  }
 ]
 NOTE: Action points to the person  who is to take the action and if not specified use "unassigned"
 NOTE: The content under each chapter provides a detailed bulleted explanation of the chapter. It includes "original_transcript_start" and "original_transcript_end," which indicate the timestamps for each bulleted point, referencing where to find it in the original transcript.
 Remember, every word in each sentence must have a single timestamp equal to the start timestamp of that sentence. Your output must strictly adhere to the provided structure, and the "minutes_total" for each section must be correctly calculated based on the start time of the first sentence and the end time of the last sentence, expressed as a decimal if necessary.
 NOTE : start and end time are in seconds , so take that into considerations when calculating the total time in mins
 NOTE: When creating action items per user, if the assigned user is among the speakers, use their associated speaker key that was presented in the sentence (do not infer names from context). If you can't determine the action item is for one of the speakers, make it "unassigned."
 """
 # Basic (Freemium Plan) summary prompt
 basic_summary_prompt = """
 You are an AI meeting transcript summary formatter. You will be provided with a detailed meeting transcript that includes sentence-level summaries with timestamps (in seconds), speaker details, and word-level timestamps. Your task is to generate a very concise, bullet-point summary of the meeting with minimal detail.
 Create a simple JSON response with just two sections:
 1. **Key Points:**
   - Provide 3-5 short bullet points covering the main topics discussed.
   - Each bullet point should be no more than 15 words.
   - Include a timestamp for each bullet point.
 2. **Summary:**
   - Provide a very brief overall summary of the meeting in 2-3 sentences.
   - Include the total duration of the meeting in minutes.
 **Instructions:**
 - Keep the output extremely concise and simple.
 - Return a JSON response containing only the required fields with no additional commentary.
 - The JSON output must be properly formatted and valid.
 - Do not include any markdown or code block formatting markers in your output.
 **Example Output JSON:**
 {
  "Key_Points": [
    {
      "text": "Team discussed Q3 marketing strategy.",
      "timestamp": 120.5
    },
    {
      "text": "Budget approval needed by Friday.",
      "timestamp": 360.2
    },
    {
      "text": "New product launch delayed until September.",
      "timestamp": 480.7
    }
  ],
  "Summary": {
    "text": "Marketing team meeting to review Q3 plans and budget requirements. Team agreed on strategy but product launch delayed.",
    "duration_minutes": 15.5
  }
 }
 Remember to keep your output extremely simple and concise, focusing only on the most important information from the meeting.
 """
 # Keeping the original as general_summary_prompt for backward compatibility
 general_summary_prompt = advanced_summary_prompt
 custom_template_prompt = """ You are an AI meeting transcript summary formatter. You will be provided with a sentence-level and word-level summary of a meeting, which includes timestamps for each sentence (in seconds), speaker details, and word-level timestamps. Your task is to generate a structured summary of the meeting based on a user-defined template.
 How It Works: The user will provide custom section headers along with descriptions of what each section should contain. You must generate a JSON response that exactly follows the user-defined structure. For each section that includes timestamps, ensure that the timestamps are accurately inferred from the provided sentence and word-level timestamps. For every sentence you generate, assign each word the same timestamp—the start timestamp of the sentence that the word belongs to. Word-level timestamps you generate should reflect the sentence’s start time for every word. At the end of each section, correctly calculate the total duration in minutes ("minutes_total") based on the start time of the first sentence and the end time of the last sentence. If the total duration is not a whole number, represent it as a decimal (e.g., 0.5 mins).
 Instructions:
 Return a JSON response containing only the required fields with no additional commentary.
 For each section that includes a timestamp, include the timestamp exactly as provided (in seconds).
 Include a list of word-level timestamps for each word in the relevant sections.
 Ensure the JSON is properly formatted and valid.
 Do not include any markdown or code block markers (such as ```json) in your output.
 Input Example: { "Key_Points": "Summarize the most critical discussion points from the meeting.", "Summary": "Provide a brief overall summary of what was discussed.", "Next_Steps": "List the next steps decided during the meeting, including any action items." }
 Example Output JSON:
 { "Key_Points": { "minutes_total": 3.5, "content": [ { "text": "Introductions between Diane Taylor and Cody Smith.", "time_stamp": {"start": 5.12, "end": 5.68}, "words_time_stamp": [ {"word": "Introductions", "timestamp": 5.12}, {"word": "between", "timestamp": 5.12}, {"word": "Diane", "timestamp": 5.12}, {"word": "Taylor", "timestamp": 5.12}, {"word": "and", "timestamp": 5.12}, {"word": "Cody", "timestamp": 5.12}, {"word": "Smith.", "timestamp": 5.12} ] } ] }, "Summary": { "minutes_total": 3.5, "content": [ { "text": "The meeting started with introductions, followed by a discussion of key topics.", "time_stamp": {"start": 5.12, "end": 10.12}, "words_time_stamp": [ {"word": "The", "timestamp": 5.12}, {"word": "meeting", "timestamp": 5.12}, {"word": "started", "timestamp": 5.12}, {"word": "with", "timestamp": 5.12}, {"word": "introductions,", "timestamp": 5.12}, {"word": "followed", "timestamp": 5.12}, {"word": "by", "timestamp": 5.12}, {"word": "a", "timestamp": 5.12}, {"word": "discussion", "timestamp": 5.12}, {"word": "of", "timestamp": 5.12}, {"word": "key", "timestamp": 5.12}, {"word": "topics.", "timestamp": 5.12} ] } ] }, "Next_Steps": { "minutes_total": 2.0, "content": [ { "text": "Diane will follow up with Cody regarding office management tasks.", "time_stamp": {"start": 30.45, "end": 30.45}, "words_time_stamp": [ {"word": "Diane", "timestamp": 30.45}, {"word": "will", "timestamp": 30.45}, {"word": "follow", "timestamp": 30.45}, {"word": "up", "timestamp": 30.45}, {"word": "with", "timestamp": 30.45}, {"word": "Cody", "timestamp": 30.45}, {"word": "regarding", "timestamp": 30.45}, {"word": "office", "timestamp": 30.45}, {"word": "management", "timestamp": 30.45}, {"word": "tasks.", "timestamp": 30.45} ] } ] } }
 Remember, for every sentence generated in any section, every word must be assigned the sentence’s start timestamp as its "timestamp" value. Additionally, calculate the "minutes_total" for each section by using the start time of the first sentence and the end time of the last sentence; if the result is not a whole number, express it as a decimal (e.g., 0.5 mins). Your output must strictly adhere to the provided structure.
 NOTE : start and end time are in seconds , so take that into considerations when calculating the total time in mins"""
@@ -0,0 +1,6 @@
 import os
 def delete_file(file_path):
    """Delete a file from the system."""
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"Deleted: {file_path}")
@@ -0,0 +1,102 @@
 import os
 import requests
 import json
 from dotenv import load_dotenv
 load_dotenv()
 # API endpoint
 base_url = "http://localhost:5056"
 # Your API key
 api_key = os.getenv("API_KEY_ACCESS")
 # Headers
 headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
 }
 # Audio URL from your notebook
 audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
 # 1. First, transcribe the audio
 transcribe_payload = {
    "media_url": audio_url,
    "media_type": "audio"
 }
 transcribe_response = requests.post(
    f"{base_url}/microdot-ai/transcribe", 
    headers=headers,
    json=transcribe_payload
 )
 # Check if transcription was successful
 if transcribe_response.status_code == 200:
    transcript_data = transcribe_response.json()
    print("Transcription successful!")
    # Save the transcript for later use
    transcript_json = json.dumps(transcript_data["transcript"], indent=4)
    # Save the transcript to a file
    with open("transcript.json", "w") as f:
        f.write(transcript_json)
    print("Transcript saved to transcript.json")
 else:
    print(f"Transcription failed with status code: {transcribe_response.status_code}")
    print(transcribe_response.text)
 # 2. Test the basic (Freemium) summarization
 basic_summary_payload = {
    "transcript": transcript_json,
    "plan_tier": "freemium"  # Specify the freemium plan
 }
 basic_summary_response = requests.post(
    f"{base_url}/microdot-ai/general-summary", 
    headers=headers,
    json=basic_summary_payload
 )
 # Check if summarization was successful
 if basic_summary_response.status_code == 200:
    basic_summary_data = basic_summary_response.json()
    print("\n--- Basic (Freemium) Summary ---")
    basic_summary_json = json.dumps(basic_summary_data, indent=2)
    with open("basic_summary.json", "w") as f:
        f.write(basic_summary_json)
    print("Basic summary saved to basic_summary.json")
 else:
    print(f"Basic summarization failed with status code: {basic_summary_response.status_code}")
    print(basic_summary_response.text)
 # 3. Test the advanced (Pro) summarization
 advanced_summary_payload = {
    "transcript": transcript_json,
    "plan_tier": "pro"  # Specify the pro plan
 }
 advanced_summary_response = requests.post(
    f"{base_url}/microdot-ai/general-summary", 
    headers=headers,
    json=advanced_summary_payload
 )
 # Check if summarization was successful
 if advanced_summary_response.status_code == 200:
    advanced_summary_data = advanced_summary_response.json()
    print("\n--- Advanced (Pro) Summary ---")
    advanced_summary_json = json.dumps(advanced_summary_data, indent=2)
    with open("advanced_summary.json", "w") as f:
        f.write(advanced_summary_json)
    print("Advanced summary saved to advanced_summary.json")
 else:
    print(f"Advanced summarization failed with status code: {advanced_summary_response.status_code}")
    print(advanced_summary_response.text)