diff --git a/src/file_reader.py b/src/file_reader.py deleted file mode 100644 index c4df886..0000000 --- a/src/file_reader.py +++ /dev/null @@ -1,26 +0,0 @@ -from tika import parser -import tika - -# Path to the file you want to parse -file_path = "C:\\Users\\babaw\\Documents\\Work\\Mana Knight Digital\\ds_task_scp\\data\\9.confidentiality agreement.pdf" - - -try: - # Parse the file - parsed = parser.from_file(file_path) - - # Extract content - content = parsed["content"] - print("--- Content ---") - print(str(content).strip()) - - # Extract metadata - metadata = parsed["metadata"] - print("\n--- Metadata ---") - for key, value in metadata.items(): - print(f"{key}: {value}") - -except FileNotFoundError: - print(f"Error: File not found at {file_path}") -except Exception as e: - print(f"An error occurred: {e}") \ No newline at end of file