import os import requests import json from PyPDF2 import PdfReader base_path = os.path.join("data", "config_files") THEME_CONTEXT_PATH = os.path.join(base_path, "theme_context.json") with open(THEME_CONTEXT_PATH, "r", encoding="utf-8") as f: themes = json.load(f) def delete_file(file_path): try: os.remove(file_path) print(f"Deleted file: {file_path}") except OSError as e: print(f"Error deleting file {file_path}: {e}") def delete_all_files_in_directory(directory_path): try: for filename in os.listdir(directory_path): file_path = os.path.join(directory_path, filename) if os.path.isfile(file_path): os.remove(file_path) print(f"Deleted file: {file_path}") except OSError as e: print(f"Error deleting files in {directory_path}: {e}") def format_questions_text(questions_dict,key): """Format questions as text with dashes.""" formatted_text = "" for question in questions_dict[key]: formatted_text += f"- {question['question']}\n" return formatted_text.strip() def format_theme_text(theme_id): """Format questions as text with dashes.""" formatted_text = "" matching_themes = [t for t in themes if t["id"] == theme_id] current_theme = matching_themes[0] formatted_text += f"- {current_theme['id']}\n" formatted_text += f"- {current_theme['theme']}\n" formatted_text += f"- {current_theme['context']}\n" return formatted_text.strip() def download_pdf_and_extract_text(url: str) -> str: # Create a temporary file path temp_file_path = 'temp.pdf' # Download the PDF from the URL response = requests.get(url) response.raise_for_status() # Raise an error for bad responses with open(temp_file_path, 'wb') as f: f.write(response.content) # Load the PDF reader = PdfReader(temp_file_path) # Extract text from all pages and combine into one text combined_text = "\n\n".join(page.extract_text() for page in reader.pages if page.extract_text()) # Delete the temporary file os.remove(temp_file_path) return combined_text def format_qna_json_text(json_data): """ Format a list of Q&A JSON data into a text string with dashes. Parameters: - json_data (list): A list of Q&A dictionaries with 'question' and 'answer' keys. Returns: - str: A formatted text string. """ formatted_text = "" # Check if input is a list of Q&A dictionaries if isinstance(json_data, list): for item in json_data: if 'question' in item and 'answer' in item: formatted_text += f"- Question: {item['question']}\n" formatted_text += f" Answer: {item['answer']}\n" else: formatted_text += "- Incomplete Q&A entry\n" return formatted_text.strip() # Example usage: