import base64 import os import requests from dotenv import load_dotenv load_dotenv() API_KEY_REF = os.getenv("OPENROUTER_API_KEY") def encode_pdf_to_base64(pdf_path): with open(pdf_path, "rb") as pdf_file: return base64.b64encode(pdf_file.read()).decode("utf-8") def extract_spirometry_table_from_pdf(pdf_path): url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {API_KEY_REF}", "Content-Type": "application/json", } # Read and encode the PDF base64_pdf = encode_pdf_to_base64(pdf_path) data_url = f"data:application/pdf;base64,{base64_pdf}" messages = [ { "role": "user", "content": [ { "type": "text", "text": "Please extract the Spirometry table from the pdf and return the values in csv format, " "note that it is the unit of parameter that is beside it and it should not be a column. " "The '-' Should be treated as empty values." "do not add 'csv' at the start or end of the response", }, { "type": "file", "file": {"filename": "document.pdf", "file_data": data_url}, }, ], } ] payload = { "model": "google/gemini-2.5-flash-lite", "messages": messages, } response = requests.post(url, headers=headers, json=payload) response_data = response.json() if "choices" in response_data and len(response_data["choices"]) > 0: content = response_data["choices"][0]["message"]["content"] # Save to a CSV file output_file = "extracted_spirometry_table.csv" with open(output_file, "w", encoding="utf-8") as f: f.write(content) return f"Extracted table saved to {output_file}" else: return "No content found in response"