2025-08-05 22:25:51 +01:00
|
|
|
import os
|
|
|
|
|
from datetime import datetime, timedelta
|
2025-08-07 09:06:05 +01:00
|
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
|
|
|
|
|
class GoogleDriveSync:
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.service = None
|
|
|
|
|
self.processed_files = set()
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
def authenticate(self):
|
|
|
|
|
"""Authenticate with Google Drive API"""
|
|
|
|
|
try:
|
|
|
|
|
from google.auth.transport.requests import Request
|
|
|
|
|
from google.oauth2.credentials import Credentials
|
|
|
|
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
|
|
|
from googleapiclient.discovery import build
|
2025-08-07 09:06:05 +01:00
|
|
|
|
|
|
|
|
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# Load existing credentials
|
2025-08-07 09:06:05 +01:00
|
|
|
if os.path.exists("token.json"):
|
|
|
|
|
self.creds = Credentials.from_authorized_user_file("token.json", SCOPES)
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# If no valid credentials available, let user log in
|
|
|
|
|
if not self.creds or not self.creds.valid:
|
|
|
|
|
if self.creds and self.creds.expired and self.creds.refresh_token:
|
|
|
|
|
self.creds.refresh(Request())
|
|
|
|
|
else:
|
2025-08-07 09:06:05 +01:00
|
|
|
if not os.path.exists("credentials.json"):
|
|
|
|
|
raise Exception(
|
|
|
|
|
"credentials.json not found. Please download from Google Cloud Console."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
flow = InstalledAppFlow.from_client_secrets_file(
|
|
|
|
|
"credentials.json", SCOPES
|
|
|
|
|
)
|
2025-08-05 22:25:51 +01:00
|
|
|
self.creds = flow.run_local_server(port=0)
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# Save credentials for next run
|
2025-08-07 09:06:05 +01:00
|
|
|
with open("token.json", "w") as token:
|
2025-08-05 22:25:51 +01:00
|
|
|
token.write(self.creds.to_json())
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# Build the Drive service
|
2025-08-07 09:06:05 +01:00
|
|
|
self.service = build("drive", "v3", credentials=self.creds)
|
2025-08-05 22:25:51 +01:00
|
|
|
return True
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Authentication error: {e}")
|
|
|
|
|
return False
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
def list_folders(self) -> List[Dict[str, Any]]:
|
|
|
|
|
"""List all folders in Google Drive"""
|
|
|
|
|
if not self.service:
|
|
|
|
|
if not self.authenticate():
|
|
|
|
|
return []
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
try:
|
2025-08-07 09:06:05 +01:00
|
|
|
results = (
|
|
|
|
|
self.service.files()
|
|
|
|
|
.list(
|
|
|
|
|
q="mimeType='application/vnd.google-apps.folder'",
|
|
|
|
|
pageSize=100,
|
|
|
|
|
fields="nextPageToken, files(id, name, createdTime, modifiedTime)",
|
|
|
|
|
)
|
|
|
|
|
.execute()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return results.get("files", [])
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error listing folders: {e}")
|
|
|
|
|
return []
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
def get_folder_info(self, folder_id: str) -> Dict[str, Any]:
|
|
|
|
|
"""Get information about a Google Drive folder"""
|
|
|
|
|
if not self.service:
|
|
|
|
|
if not self.authenticate():
|
|
|
|
|
return {}
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
try:
|
2025-08-07 09:06:05 +01:00
|
|
|
folder = (
|
|
|
|
|
self.service.files()
|
|
|
|
|
.get(fileId=folder_id, fields="id, name, createdTime, modifiedTime")
|
|
|
|
|
.execute()
|
|
|
|
|
)
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
return folder
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error getting folder info: {e}")
|
|
|
|
|
return {}
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]:
|
|
|
|
|
"""Process all receipt files from Google Drive"""
|
|
|
|
|
if not self.service:
|
|
|
|
|
if not self.authenticate():
|
|
|
|
|
return []
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
results = []
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
try:
|
|
|
|
|
# File types to look for
|
2025-08-07 09:06:05 +01:00
|
|
|
file_types = [
|
|
|
|
|
"'application/pdf'",
|
|
|
|
|
"'image/jpeg'",
|
|
|
|
|
"'image/png'",
|
|
|
|
|
"'image/gif'",
|
|
|
|
|
"'image/bmp'",
|
|
|
|
|
]
|
2025-08-05 22:25:51 +01:00
|
|
|
mime_types = " or ".join(file_types)
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# Build query
|
|
|
|
|
query = f"mimeType contains {mime_types}"
|
|
|
|
|
if folder_id:
|
|
|
|
|
query += f" and '{folder_id}' in parents"
|
2025-08-07 09:06:05 +01:00
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# Add date filter (last 30 days)
|
2025-08-07 09:06:05 +01:00
|
|
|
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + "Z"
|
2025-08-05 22:25:51 +01:00
|
|
|
query += f" and modifiedTime > '{thirty_days_ago}'"
|
2025-08-07 09:06:05 +01:00
|
|
|
|
|
|
|
|
results_files = (
|
|
|
|
|
self.service.files()
|
|
|
|
|
.list(
|
|
|
|
|
q=query,
|
|
|
|
|
pageSize=100,
|
|
|
|
|
fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)",
|
|
|
|
|
)
|
|
|
|
|
.execute()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
files = results_files.get("files", [])
|
|
|
|
|
files = [file for file in files if file["id"] not in self.processed_files]
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
# For demo purposes, return mock results
|
|
|
|
|
for file in files[:3]: # Process first 3 files
|
|
|
|
|
mock_result = {
|
2025-08-07 09:06:05 +01:00
|
|
|
"file_id": file["id"],
|
|
|
|
|
"filename": file["name"],
|
|
|
|
|
"drive_modified": file["modifiedTime"],
|
|
|
|
|
"file_size": file.get("size", 0),
|
2025-08-05 22:25:51 +01:00
|
|
|
"extraction_success": True,
|
|
|
|
|
"vendor": "Demo Vendor",
|
|
|
|
|
"description": "Coffee and sandwich",
|
|
|
|
|
"total_amount": 25.50,
|
|
|
|
|
"tax_amount": 2.04,
|
|
|
|
|
"date": "2024-01-15",
|
|
|
|
|
"category": "Food",
|
2025-08-07 09:06:05 +01:00
|
|
|
"confidence": 0.95,
|
2025-08-05 22:25:51 +01:00
|
|
|
}
|
|
|
|
|
results.append(mock_result)
|
2025-08-07 09:06:05 +01:00
|
|
|
self.processed_files.add(file["id"])
|
|
|
|
|
|
2025-08-05 22:25:51 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error processing Drive files: {e}")
|
2025-08-07 09:06:05 +01:00
|
|
|
|
|
|
|
|
return results
|