Deleted unnecessary files
This commit is contained in:
@@ -1,26 +0,0 @@
|
|||||||
from tika import parser
|
|
||||||
import tika
|
|
||||||
|
|
||||||
# Path to the file you want to parse
|
|
||||||
file_path = "C:\\Users\\babaw\\Documents\\Work\\Mana Knight Digital\\ds_task_scp\\data\\9.confidentiality agreement.pdf"
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Parse the file
|
|
||||||
parsed = parser.from_file(file_path)
|
|
||||||
|
|
||||||
# Extract content
|
|
||||||
content = parsed["content"]
|
|
||||||
print("--- Content ---")
|
|
||||||
print(str(content).strip())
|
|
||||||
|
|
||||||
# Extract metadata
|
|
||||||
metadata = parsed["metadata"]
|
|
||||||
print("\n--- Metadata ---")
|
|
||||||
for key, value in metadata.items():
|
|
||||||
print(f"{key}: {value}")
|
|
||||||
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f"Error: File not found at {file_path}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
Reference in New Issue
Block a user