""" Smart Farm Photo Keyword Tagging AI - Main Processing Script """ import os import sys import time import pandas as pd from datetime import datetime import argparse # Add src to path for imports sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from src.data.image_processor import ImageProcessor from src.model.keyword_generator import AgricultureKeywordGenerator from src.utils.validation import KeywordValidator, DataQualityChecker from src.utils.batch_processor import BatchProcessor, estimate_processing_time def process_agricultural_photos(input_dir: str = "data/raw", output_dir: str = "outputs", validate_quality: bool = True, batch_size: int = 500, model_path: str = None): """Enhanced function to process agricultural photos with quality validation""" print("🚜 Smart Farm Photo Keyword Tagging AI - Enhanced Version") print("=" * 60) # Initialize components print("Initializing components...") image_processor = ImageProcessor(input_dir) keyword_generator = AgricultureKeywordGenerator(model_path) validator = KeywordValidator() if validate_quality else None # Get image files and estimate processing time image_files = image_processor.get_image_files(input_dir) if not image_files: print("No images found to process!") return print(f"Found {len(image_files)} images to process") time_estimate = estimate_processing_time(len(image_files)) print(f"Estimated processing time: {time_estimate['estimate']}") # Process images with enhanced error handling print(f"\nProcessing images from: {input_dir}") image_df = image_processor.batch_process_images(input_dir) if image_df.empty: print("No valid images found to process!") return # Generate keywords for each image with quality validation results = [] quality_scores = [] processing_start = time.time() for idx, row in image_df.iterrows(): if 'error' in row: print(f"Skipping {row['filename']} due to error: {row['error']}") continue print(f"Processing {row['filename']}... ({idx+1}/{len(image_df)})") try: # Generate keywords and title ai_results = keyword_generator.generate_keywords(row['filepath']) # Validate quality if enabled keyword_validation = validator.validate_keywords(ai_results['keywords']) if validator else None title_validation = validator.validate_title(ai_results['title']) if validator else None # Create result row with enhanced data result = { 'filename': row['filename'], 'human_keywords': '', # Placeholder for human keywords 'ai_keywords': ', '.join(ai_results['keywords']), 'ai_title': ai_results['title'], 'location': row.get('location', ''), 'caption': ai_results['caption'] } # Add quality scores if validation enabled if validate_quality and keyword_validation and title_validation: result.update({ 'keyword_quality_score': keyword_validation['score'], 'title_quality_score': title_validation['score'], 'quality_issues': '; '.join(keyword_validation['issues'] + title_validation['issues']) }) quality_scores.append(keyword_validation['score']) results.append(result) print(f" āœ“ Generated {len(ai_results['keywords'])} keywords" + (f" (Quality: {keyword_validation['score']:.1f})" if validate_quality and keyword_validation else "")) except Exception as e: print(f" āœ— Error processing {row['filename']}: {e}") continue # Create output DataFrame and save results if not results: print("No images were successfully processed!") return None results_df = pd.DataFrame(results) # Save to CSV os.makedirs(output_dir, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = os.path.join(output_dir, f"agricultural_keywords_{timestamp}.csv") results_df.to_csv(output_file, index=False) # Calculate processing statistics processing_time = time.time() - processing_start avg_time_per_image = processing_time / len(results) if results else 0 print(f"\nāœ… Processing complete!") print(f"Results saved to: {output_file}") print(f"Processed {len(results_df)} images successfully") print(f"Total processing time: {processing_time/60:.1f} minutes") print(f"Average time per image: {avg_time_per_image:.1f} seconds") # Quality statistics if validation was enabled if validate_quality and quality_scores: avg_quality = sum(quality_scores) / len(quality_scores) print(f"Average keyword quality score: {avg_quality:.1f}/100") # Validate CSV output csv_validation = DataQualityChecker.validate_csv_output(output_file) if csv_validation['valid']: print(f"āœ… CSV validation passed - {csv_validation['completion_rate']['keywords']}% keyword completion") else: print(f"āš ļø CSV validation issues: {csv_validation['error']}") # Display enhanced sample results print("\nšŸ“Š Sample Results:") print("-" * 80) for idx, row in results_df.head(3).iterrows(): print(f"File: {row['filename']}") print(f"Title: {row['ai_title']}") print(f"Keywords: {row['ai_keywords']}") print(f"Location: {row['location'] if row['location'] else 'Not available'}") if validate_quality and 'keyword_quality_score' in row: print(f"Quality Score: {row['keyword_quality_score']}/100") print("-" * 80) # Performance projections print(f"\nšŸš€ Performance Projections:") print(f"Time for 500 images: {(avg_time_per_image * 500)/60:.1f} minutes") print(f"Time for 1000 images: {(avg_time_per_image * 1000)/60:.1f} minutes") return output_file if __name__ == "__main__": parser = argparse.ArgumentParser(description='Enhanced Agricultural Photo Keyword Tagging AI') parser.add_argument('--input', '-i', default='data/raw', help='Input directory with images') parser.add_argument('--output', '-o', default='outputs', help='Output directory for results') parser.add_argument('--no-validation', action='store_true', help='Skip quality validation') parser.add_argument('--batch-size', type=int, default=500, help='Batch size for processing') parser.add_argument('--model-path', type=str, default=None, help='Path to fine-tuned model (optional)') args = parser.parse_args() try: output_file = process_agricultural_photos( args.input, args.output, validate_quality=not args.no_validation, batch_size=args.batch_size, model_path=args.model_path ) if output_file: print(f"\nšŸŽ‰ Success! Check your results in: {output_file}") else: print(f"\nāš ļø Processing completed but no results generated") except Exception as e: print(f"\nāŒ Error: {e}") import traceback traceback.print_exc() sys.exit(1)