Complete Enhanced Agricultural AI System - All Requirements Met

This commit is contained in:
Aherobo Ovie Victor
2025-07-16 20:35:20 +01:00
parent 60919dc752
commit 03f827f298
6 changed files with 669 additions and 55 deletions
+91 -20
View File
@@ -4,6 +4,7 @@ Smart Farm Photo Keyword Tagging AI - Main Processing Script
import os
import sys
import time
import pandas as pd
from datetime import datetime
import argparse
@@ -13,44 +14,61 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from src.data.image_processor import ImageProcessor
from src.model.keyword_generator import AgricultureKeywordGenerator
from src.utils.validation import KeywordValidator, DataQualityChecker
from src.utils.batch_processor import BatchProcessor, estimate_processing_time
def process_agricultural_photos(input_dir: str = "data/raw", output_dir: str = "outputs"):
"""Main function to process agricultural photos and generate keywords"""
def process_agricultural_photos(input_dir: str = "data/raw", output_dir: str = "outputs",
validate_quality: bool = True, batch_size: int = 500):
"""Enhanced function to process agricultural photos with quality validation"""
print("🚜 Smart Farm Photo Keyword Tagging AI")
print("=" * 50)
print("🚜 Smart Farm Photo Keyword Tagging AI - Enhanced Version")
print("=" * 60)
# Initialize components
print("Initializing image processor...")
print("Initializing components...")
image_processor = ImageProcessor(input_dir)
print("Initializing AI keyword generator...")
keyword_generator = AgricultureKeywordGenerator()
validator = KeywordValidator() if validate_quality else None
# Process images
# Get image files and estimate processing time
image_files = image_processor.get_image_files(input_dir)
if not image_files:
print("No images found to process!")
return
print(f"Found {len(image_files)} images to process")
time_estimate = estimate_processing_time(len(image_files))
print(f"Estimated processing time: {time_estimate['estimate']}")
# Process images with enhanced error handling
print(f"\nProcessing images from: {input_dir}")
image_df = image_processor.batch_process_images(input_dir)
if image_df.empty:
print("No images found to process!")
print("No valid images found to process!")
return
print(f"Found {len(image_df)} images to process")
# Generate keywords for each image
# Generate keywords for each image with quality validation
results = []
quality_scores = []
processing_start = time.time()
for idx, row in image_df.iterrows():
if 'error' in row:
print(f"Skipping {row['filename']} due to error: {row['error']}")
continue
print(f"Processing {row['filename']}...")
print(f"Processing {row['filename']}... ({idx+1}/{len(image_df)})")
try:
# Generate keywords and title
ai_results = keyword_generator.generate_keywords(row['filepath'])
# Create result row
# Validate quality if enabled
keyword_validation = validator.validate_keywords(ai_results['keywords']) if validator else None
title_validation = validator.validate_title(ai_results['title']) if validator else None
# Create result row with enhanced data
result = {
'filename': row['filename'],
'human_keywords': '', # Placeholder for human keywords
@@ -60,14 +78,28 @@ def process_agricultural_photos(input_dir: str = "data/raw", output_dir: str = "
'caption': ai_results['caption']
}
# Add quality scores if validation enabled
if validate_quality and keyword_validation and title_validation:
result.update({
'keyword_quality_score': keyword_validation['score'],
'title_quality_score': title_validation['score'],
'quality_issues': '; '.join(keyword_validation['issues'] + title_validation['issues'])
})
quality_scores.append(keyword_validation['score'])
results.append(result)
print(f" ✓ Generated {len(ai_results['keywords'])} keywords")
print(f" ✓ Generated {len(ai_results['keywords'])} keywords" +
(f" (Quality: {keyword_validation['score']:.1f})" if validate_quality and keyword_validation else ""))
except Exception as e:
print(f" ✗ Error processing {row['filename']}: {e}")
continue
# Create output DataFrame
# Create output DataFrame and save results
if not results:
print("No images were successfully processed!")
return None
results_df = pd.DataFrame(results)
# Save to CSV
@@ -77,11 +109,29 @@ def process_agricultural_photos(input_dir: str = "data/raw", output_dir: str = "
results_df.to_csv(output_file, index=False)
# Calculate processing statistics
processing_time = time.time() - processing_start
avg_time_per_image = processing_time / len(results) if results else 0
print(f"\n✅ Processing complete!")
print(f"Results saved to: {output_file}")
print(f"Processed {len(results_df)} images successfully")
print(f"Total processing time: {processing_time/60:.1f} minutes")
print(f"Average time per image: {avg_time_per_image:.1f} seconds")
# Display sample results
# Quality statistics if validation was enabled
if validate_quality and quality_scores:
avg_quality = sum(quality_scores) / len(quality_scores)
print(f"Average keyword quality score: {avg_quality:.1f}/100")
# Validate CSV output
csv_validation = DataQualityChecker.validate_csv_output(output_file)
if csv_validation['valid']:
print(f"✅ CSV validation passed - {csv_validation['completion_rate']['keywords']}% keyword completion")
else:
print(f"⚠️ CSV validation issues: {csv_validation['error']}")
# Display enhanced sample results
print("\n📊 Sample Results:")
print("-" * 80)
for idx, row in results_df.head(3).iterrows():
@@ -89,20 +139,41 @@ def process_agricultural_photos(input_dir: str = "data/raw", output_dir: str = "
print(f"Title: {row['ai_title']}")
print(f"Keywords: {row['ai_keywords']}")
print(f"Location: {row['location'] if row['location'] else 'Not available'}")
if validate_quality and 'keyword_quality_score' in row:
print(f"Quality Score: {row['keyword_quality_score']}/100")
print("-" * 80)
# Performance projections
print(f"\n🚀 Performance Projections:")
print(f"Time for 500 images: {(avg_time_per_image * 500)/60:.1f} minutes")
print(f"Time for 1000 images: {(avg_time_per_image * 1000)/60:.1f} minutes")
return output_file
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process agricultural photos for keyword tagging')
parser = argparse.ArgumentParser(description='Enhanced Agricultural Photo Keyword Tagging AI')
parser.add_argument('--input', '-i', default='data/raw', help='Input directory with images')
parser.add_argument('--output', '-o', default='outputs', help='Output directory for results')
parser.add_argument('--no-validation', action='store_true', help='Skip quality validation')
parser.add_argument('--batch-size', type=int, default=500, help='Batch size for processing')
args = parser.parse_args()
try:
output_file = process_agricultural_photos(args.input, args.output)
print(f"\n🎉 Success! Check your results in: {output_file}")
output_file = process_agricultural_photos(
args.input,
args.output,
validate_quality=not args.no_validation,
batch_size=args.batch_size
)
if output_file:
print(f"\n🎉 Success! Check your results in: {output_file}")
else:
print(f"\n⚠️ Processing completed but no results generated")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)