Refactor code for improved readability and maintainability across multiple files

This commit is contained in:
bolade
2025-08-07 09:06:05 +01:00
parent 1f530da7c4
commit 9698e2fcaf
5 changed files with 224 additions and 123 deletions
+81 -62
View File
@@ -1,13 +1,13 @@
import os
import io
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from typing import Any, Dict, List
class GoogleDriveSync:
def __init__(self):
self.service = None
self.processed_files = set()
def authenticate(self):
"""Authenticate with Google Drive API"""
try:
@@ -15,111 +15,130 @@ class GoogleDriveSync:
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
# Load existing credentials
if os.path.exists('token.json'):
self.creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if os.path.exists("token.json"):
self.creds = Credentials.from_authorized_user_file("token.json", SCOPES)
# If no valid credentials available, let user log in
if not self.creds or not self.creds.valid:
if self.creds and self.creds.expired and self.creds.refresh_token:
self.creds.refresh(Request())
else:
if not os.path.exists('credentials.json'):
raise Exception("credentials.json not found. Please download from Google Cloud Console.")
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
if not os.path.exists("credentials.json"):
raise Exception(
"credentials.json not found. Please download from Google Cloud Console."
)
flow = InstalledAppFlow.from_client_secrets_file(
"credentials.json", SCOPES
)
self.creds = flow.run_local_server(port=0)
# Save credentials for next run
with open('token.json', 'w') as token:
with open("token.json", "w") as token:
token.write(self.creds.to_json())
# Build the Drive service
self.service = build('drive', 'v3', credentials=self.creds)
self.service = build("drive", "v3", credentials=self.creds)
return True
except Exception as e:
print(f"Authentication error: {e}")
return False
def list_folders(self) -> List[Dict[str, Any]]:
"""List all folders in Google Drive"""
if not self.service:
if not self.authenticate():
return []
try:
results = self.service.files().list(
q="mimeType='application/vnd.google-apps.folder'",
pageSize=100,
fields="nextPageToken, files(id, name, createdTime, modifiedTime)"
).execute()
return results.get('files', [])
results = (
self.service.files()
.list(
q="mimeType='application/vnd.google-apps.folder'",
pageSize=100,
fields="nextPageToken, files(id, name, createdTime, modifiedTime)",
)
.execute()
)
return results.get("files", [])
except Exception as e:
print(f"Error listing folders: {e}")
return []
def get_folder_info(self, folder_id: str) -> Dict[str, Any]:
"""Get information about a Google Drive folder"""
if not self.service:
if not self.authenticate():
return {}
try:
folder = self.service.files().get(
fileId=folder_id,
fields="id, name, createdTime, modifiedTime"
).execute()
folder = (
self.service.files()
.get(fileId=folder_id, fields="id, name, createdTime, modifiedTime")
.execute()
)
return folder
except Exception as e:
print(f"Error getting folder info: {e}")
return {}
async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]:
"""Process all receipt files from Google Drive"""
if not self.service:
if not self.authenticate():
return []
results = []
try:
# File types to look for
file_types = ["'application/pdf'", "'image/jpeg'", "'image/png'", "'image/gif'", "'image/bmp'"]
file_types = [
"'application/pdf'",
"'image/jpeg'",
"'image/png'",
"'image/gif'",
"'image/bmp'",
]
mime_types = " or ".join(file_types)
# Build query
query = f"mimeType contains {mime_types}"
if folder_id:
query += f" and '{folder_id}' in parents"
# Add date filter (last 30 days)
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + 'Z'
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + "Z"
query += f" and modifiedTime > '{thirty_days_ago}'"
results_files = self.service.files().list(
q=query,
pageSize=100,
fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)"
).execute()
files = results_files.get('files', [])
files = [file for file in files if file['id'] not in self.processed_files]
results_files = (
self.service.files()
.list(
q=query,
pageSize=100,
fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)",
)
.execute()
)
files = results_files.get("files", [])
files = [file for file in files if file["id"] not in self.processed_files]
# For demo purposes, return mock results
for file in files[:3]: # Process first 3 files
mock_result = {
"file_id": file['id'],
"filename": file['name'],
"drive_modified": file['modifiedTime'],
"file_size": file.get('size', 0),
"file_id": file["id"],
"filename": file["name"],
"drive_modified": file["modifiedTime"],
"file_size": file.get("size", 0),
"extraction_success": True,
"vendor": "Demo Vendor",
"description": "Coffee and sandwich",
@@ -127,12 +146,12 @@ class GoogleDriveSync:
"tax_amount": 2.04,
"date": "2024-01-15",
"category": "Food",
"confidence": 0.95
"confidence": 0.95,
}
results.append(mock_result)
self.processed_files.add(file['id'])
self.processed_files.add(file["id"])
except Exception as e:
print(f"Error processing Drive files: {e}")
return results
return results