import os from datetime import datetime, timedelta from typing import Any, Dict, List class GoogleDriveSync: def __init__(self): self.service = None self.processed_files = set() def authenticate(self): """Authenticate with Google Drive API""" try: from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] # Load existing credentials if os.path.exists("token.json"): self.creds = Credentials.from_authorized_user_file("token.json", SCOPES) # If no valid credentials available, let user log in if not self.creds or not self.creds.valid: if self.creds and self.creds.expired and self.creds.refresh_token: self.creds.refresh(Request()) else: if not os.path.exists("credentials.json"): raise Exception( "credentials.json not found. Please download from Google Cloud Console." ) flow = InstalledAppFlow.from_client_secrets_file( "credentials.json", SCOPES ) self.creds = flow.run_local_server(port=0) # Save credentials for next run with open("token.json", "w") as token: token.write(self.creds.to_json()) # Build the Drive service self.service = build("drive", "v3", credentials=self.creds) return True except Exception as e: print(f"Authentication error: {e}") return False def list_folders(self) -> List[Dict[str, Any]]: """List all folders in Google Drive""" if not self.service: if not self.authenticate(): return [] try: results = ( self.service.files() .list( q="mimeType='application/vnd.google-apps.folder'", pageSize=100, fields="nextPageToken, files(id, name, createdTime, modifiedTime)", ) .execute() ) return results.get("files", []) except Exception as e: print(f"Error listing folders: {e}") return [] def get_folder_info(self, folder_id: str) -> Dict[str, Any]: """Get information about a Google Drive folder""" if not self.service: if not self.authenticate(): return {} try: folder = ( self.service.files() .get(fileId=folder_id, fields="id, name, createdTime, modifiedTime") .execute() ) return folder except Exception as e: print(f"Error getting folder info: {e}") return {} async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]: """Process all receipt files from Google Drive""" if not self.service: if not self.authenticate(): return [] results = [] try: # File types to look for file_types = [ "'application/pdf'", "'image/jpeg'", "'image/png'", "'image/gif'", "'image/bmp'", ] mime_types = " or ".join(file_types) # Build query query = f"mimeType contains {mime_types}" if folder_id: query += f" and '{folder_id}' in parents" # Add date filter (last 30 days) thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + "Z" query += f" and modifiedTime > '{thirty_days_ago}'" results_files = ( self.service.files() .list( q=query, pageSize=100, fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)", ) .execute() ) files = results_files.get("files", []) files = [file for file in files if file["id"] not in self.processed_files] # For demo purposes, return mock results for file in files[:3]: # Process first 3 files mock_result = { "file_id": file["id"], "filename": file["name"], "drive_modified": file["modifiedTime"], "file_size": file.get("size", 0), "extraction_success": True, "vendor": "Demo Vendor", "description": "Coffee and sandwich", "total_amount": 25.50, "tax_amount": 2.04, "date": "2024-01-15", "category": "Food", "confidence": 0.95, } results.append(mock_result) self.processed_files.add(file["id"]) except Exception as e: print(f"Error processing Drive files: {e}") return results