""" Agricultural Photo Keyword Generator using BLIP-2 model """ import torch from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import re from typing import List, Dict, Optional class AgricultureKeywordGenerator: def __init__(self, model_path: Optional[str] = None): """ Initialize the BLIP-2 model for image captioning and keyword generation Args: model_path: Path to fine-tuned model. If None, uses pre-trained model. """ if model_path and os.path.exists(model_path): print(f"Loading fine-tuned agricultural model from: {model_path}") self.processor = BlipProcessor.from_pretrained(model_path) self.model = BlipForConditionalGeneration.from_pretrained(model_path) self.is_fine_tuned = True else: print("Loading pre-trained BLIP model for keyword generation...") self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") self.is_fine_tuned = False if model_path: print(f"Warning: Fine-tuned model not found at {model_path}, using pre-trained model") # Enhanced agriculture-specific keywords with distinctions self.agriculture_keywords = { 'people': { 'farmer': ['farmer', 'crop farmer', 'grain farmer', 'vegetable farmer'], 'rancher': ['rancher', 'cattle rancher', 'livestock rancher', 'beef rancher'], 'dairy': ['dairy farmer', 'dairy worker', 'milker'], 'poultry': ['chicken farmer', 'poultry farmer', 'egg farmer'], 'worker': ['farm worker', 'agricultural worker', 'field worker', 'ranch hand'], 'gender': ['male farmer', 'female farmer', 'man', 'woman', 'boy', 'girl'] }, 'animals': { 'cattle': ['cow', 'cattle', 'bull', 'calf', 'beef cattle', 'dairy cow', 'holstein', 'angus'], 'poultry': ['chicken', 'rooster', 'hen', 'chick', 'turkey', 'duck', 'goose'], 'swine': ['pig', 'hog', 'swine', 'piglet', 'boar', 'sow'], 'sheep': ['sheep', 'lamb', 'ewe', 'ram', 'wool'], 'goats': ['goat', 'kid', 'billy goat', 'nanny goat'], 'horses': ['horse', 'mare', 'stallion', 'foal', 'pony'] }, 'crops': { 'grains': ['corn', 'wheat', 'rice', 'barley', 'oats', 'rye', 'sorghum'], 'legumes': ['soybean', 'beans', 'peas', 'lentils', 'peanuts'], 'vegetables': ['tomato', 'potato', 'carrot', 'onion', 'pepper', 'lettuce', 'cabbage'], 'fruits': ['apple', 'orange', 'grape', 'strawberry', 'peach', 'cherry'], 'cash_crops': ['cotton', 'tobacco', 'sugar beet', 'sunflower'] }, 'equipment': { 'tractors': ['tractor', 'farm tractor', 'john deere', 'case ih', 'new holland'], 'harvest': ['combine', 'harvester', 'thresher', 'picker'], 'tillage': ['plow', 'disc', 'cultivator', 'harrow', 'chisel plow'], 'planting': ['planter', 'seeder', 'drill', 'transplanter'], 'irrigation': ['sprinkler', 'pivot', 'irrigation', 'drip system'], 'livestock': ['milking machine', 'feeder', 'water tank', 'barn equipment'] }, 'locations': { 'fields': ['field', 'cropland', 'farmland', 'pasture', 'meadow'], 'buildings': ['barn', 'silo', 'grain bin', 'shed', 'farmhouse', 'greenhouse'], 'areas': ['farm', 'ranch', 'dairy', 'feedlot', 'orchard', 'vineyard'] }, 'activities': { 'crop': ['planting', 'seeding', 'harvesting', 'cultivation', 'irrigation'], 'livestock': ['feeding', 'milking', 'herding', 'breeding', 'grazing'], 'general': ['farming', 'agriculture', 'rural work', 'field work'] } } print("Model loaded successfully!") def generate_caption(self, image_path: str) -> str: """Generate a descriptive caption for the image""" try: image = Image.open(image_path).convert('RGB') inputs = self.processor(image, return_tensors="pt") with torch.no_grad(): out = self.model.generate(**inputs, max_length=50, num_beams=5) caption = self.processor.decode(out[0], skip_special_tokens=True) return caption except Exception as e: print(f"Error generating caption for {image_path}: {e}") return "" def extract_keywords_from_caption(self, caption: str) -> List[str]: """Extract agriculture-relevant keywords from caption with enhanced distinctions""" keywords = [] caption_lower = caption.lower() # Extract keywords from enhanced categories for main_category, subcategories in self.agriculture_keywords.items(): if isinstance(subcategories, dict): for subcategory, terms in subcategories.items(): for term in terms: if term in caption_lower: keywords.append(term) else: # Handle old format if any remains for term in subcategories: if term in caption_lower: keywords.append(term) # Enhanced descriptive words with agricultural context descriptive_patterns = [ r'\b(?:green|fresh|organic|natural|healthy|ripe|mature)\b', # Quality r'\b(?:rural|outdoor|countryside|pastoral|agricultural)\b', # Setting r'\b(?:sunny|cloudy|dawn|dusk|morning|evening)\b', # Time/Weather r'\b(?:large|small|big|little|huge|tiny|vast|wide)\b', # Size r'\b(?:young|old|new|vintage|modern|traditional)\b', # Age/Style r'\b(?:male|female|man|woman|boy|girl)\b' # Gender ] for pattern in descriptive_patterns: matches = re.findall(pattern, caption_lower) keywords.extend(matches) # Apply agricultural distinctions keywords = self._apply_agricultural_distinctions(keywords, caption_lower) # Remove duplicates and prioritize agricultural terms keywords = self._prioritize_keywords(keywords) return keywords[:10] # Limit to 10 keywords max def _apply_agricultural_distinctions(self, keywords: List[str], caption: str) -> List[str]: """Apply specific agricultural distinctions (farmer vs rancher, etc.)""" enhanced_keywords = keywords.copy() # Farmer vs Rancher distinction if any(term in caption for term in ['cattle', 'cow', 'beef', 'livestock', 'ranch']): if 'farmer' in enhanced_keywords: enhanced_keywords.remove('farmer') enhanced_keywords.append('rancher') elif any(term in caption for term in ['crop', 'grain', 'corn', 'wheat', 'field']): if 'rancher' in enhanced_keywords: enhanced_keywords.remove('rancher') enhanced_keywords.append('farmer') # Dairy farmer distinction if any(term in caption for term in ['milk', 'dairy', 'holstein']): if 'farmer' in enhanced_keywords: enhanced_keywords.remove('farmer') enhanced_keywords.append('dairy farmer') if 'rancher' in enhanced_keywords: enhanced_keywords.remove('rancher') enhanced_keywords.append('dairy farmer') # Chicken farmer (not rancher) if any(term in caption for term in ['chicken', 'poultry', 'hen', 'rooster']): if 'rancher' in enhanced_keywords: enhanced_keywords.remove('rancher') enhanced_keywords.append('chicken farmer') # Gender identification enhancement gender_indicators = { 'male': ['man', 'boy', 'male', 'father', 'son', 'husband'], 'female': ['woman', 'girl', 'female', 'mother', 'daughter', 'wife'] } for gender, indicators in gender_indicators.items(): if any(indicator in caption for indicator in indicators): if any(role in enhanced_keywords for role in ['farmer', 'rancher', 'dairy farmer']): # Add gender specification enhanced_keywords.append(f'{gender} farmer') return enhanced_keywords def _prioritize_keywords(self, keywords: List[str]) -> List[str]: """Prioritize agricultural keywords over generic ones""" # Define priority levels high_priority = ['farmer', 'rancher', 'dairy farmer', 'chicken farmer'] medium_priority = ['tractor', 'cattle', 'corn', 'wheat', 'barn', 'field'] prioritized = [] # Add high priority keywords first for keyword in keywords: if any(hp in keyword for hp in high_priority): prioritized.append(keyword) # Add medium priority keywords for keyword in keywords: if keyword not in prioritized and any(mp in keyword for mp in medium_priority): prioritized.append(keyword) # Add remaining keywords for keyword in keywords: if keyword not in prioritized: prioritized.append(keyword) # Remove duplicates while preserving order seen = set() result = [] for keyword in prioritized: if keyword not in seen: seen.add(keyword) result.append(keyword) return result def generate_keywords(self, image_path: str) -> Dict[str, any]: """Generate keywords and title for an agricultural image""" caption = self.generate_caption(image_path) keywords = self.extract_keywords_from_caption(caption) # If we don't have enough keywords, add some generic agricultural terms if len(keywords) < 5: generic_terms = ['agriculture', 'farming', 'rural', 'outdoor', 'field'] for term in generic_terms: if term not in keywords: keywords.append(term) if len(keywords) >= 5: break return { 'caption': caption, 'keywords': keywords[:10], # Limit to 10 keywords max 'title': self.generate_title(caption) } def generate_title(self, caption: str) -> str: """Generate a product title from the caption""" # Clean up the caption to make it more title-like title = caption.strip() if title and not title[0].isupper(): title = title[0].upper() + title[1:] # Add "Agricultural" prefix if not agriculture-related agriculture_terms = ['farm', 'agriculture', 'crop', 'livestock', 'rural'] if not any(term in title.lower() for term in agriculture_terms): title = f"Agricultural scene: {title}" return title