import os import io from typing import List, Dict, Any, Optional from datetime import datetime, timedelta class GoogleDriveSync: def __init__(self): self.service = None self.processed_files = set() def authenticate(self): """Authenticate with Google Drive API""" try: from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] # Load existing credentials if os.path.exists('token.json'): self.creds = Credentials.from_authorized_user_file('token.json', SCOPES) # If no valid credentials available, let user log in if not self.creds or not self.creds.valid: if self.creds and self.creds.expired and self.creds.refresh_token: self.creds.refresh(Request()) else: if not os.path.exists('credentials.json'): raise Exception("credentials.json not found. Please download from Google Cloud Console.") flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) self.creds = flow.run_local_server(port=0) # Save credentials for next run with open('token.json', 'w') as token: token.write(self.creds.to_json()) # Build the Drive service self.service = build('drive', 'v3', credentials=self.creds) return True except Exception as e: print(f"Authentication error: {e}") return False def list_folders(self) -> List[Dict[str, Any]]: """List all folders in Google Drive""" if not self.service: if not self.authenticate(): return [] try: results = self.service.files().list( q="mimeType='application/vnd.google-apps.folder'", pageSize=100, fields="nextPageToken, files(id, name, createdTime, modifiedTime)" ).execute() return results.get('files', []) except Exception as e: print(f"Error listing folders: {e}") return [] def get_folder_info(self, folder_id: str) -> Dict[str, Any]: """Get information about a Google Drive folder""" if not self.service: if not self.authenticate(): return {} try: folder = self.service.files().get( fileId=folder_id, fields="id, name, createdTime, modifiedTime" ).execute() return folder except Exception as e: print(f"Error getting folder info: {e}") return {} async def process_drive_files(self, folder_id: str = None) -> List[Dict[str, Any]]: """Process all receipt files from Google Drive""" if not self.service: if not self.authenticate(): return [] results = [] try: # File types to look for file_types = ["'application/pdf'", "'image/jpeg'", "'image/png'", "'image/gif'", "'image/bmp'"] mime_types = " or ".join(file_types) # Build query query = f"mimeType contains {mime_types}" if folder_id: query += f" and '{folder_id}' in parents" # Add date filter (last 30 days) thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat() + 'Z' query += f" and modifiedTime > '{thirty_days_ago}'" results_files = self.service.files().list( q=query, pageSize=100, fields="nextPageToken, files(id, name, mimeType, modifiedTime, size)" ).execute() files = results_files.get('files', []) files = [file for file in files if file['id'] not in self.processed_files] # For demo purposes, return mock results for file in files[:3]: # Process first 3 files mock_result = { "file_id": file['id'], "filename": file['name'], "drive_modified": file['modifiedTime'], "file_size": file.get('size', 0), "extraction_success": True, "vendor": "Demo Vendor", "description": "Coffee and sandwich", "total_amount": 25.50, "tax_amount": 2.04, "date": "2024-01-15", "category": "Food", "confidence": 0.95 } results.append(mock_result) self.processed_files.add(file['id']) except Exception as e: print(f"Error processing Drive files: {e}") return results