src/model/keyword_generator.py

"""
Agricultural Photo Keyword Generator using BLIP-2 model
"""

import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import re
from typing import List, Dict, Optional

class AgricultureKeywordGenerator:
    def __init__(self):
        """Initialize the BLIP-2 model for image captioning and keyword generation"""
        print("Loading BLIP model for keyword generation...")
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
        
        # Agriculture-specific keywords to enhance results
        self.agriculture_keywords = {
            'people': ['farmer', 'rancher', 'agricultural worker', 'farm worker', 'dairy farmer'],
            'animals': ['cow', 'cattle', 'pig', 'chicken', 'livestock', 'dairy cow', 'beef cattle'],
            'crops': ['corn', 'wheat', 'soybean', 'cotton', 'rice', 'barley', 'oats'],
            'equipment': ['tractor', 'harvester', 'plow', 'irrigation', 'farm equipment'],
            'locations': ['field', 'farm', 'barn', 'pasture', 'greenhouse', 'ranch', 'farmland'],
            'activities': ['planting', 'harvesting', 'milking', 'feeding', 'cultivation']
        }
        
        print("Model loaded successfully!")
    
    def generate_caption(self, image_path: str) -> str:
        """Generate a descriptive caption for the image"""
        try:
            image = Image.open(image_path).convert('RGB')
            inputs = self.processor(image, return_tensors="pt")
            
            with torch.no_grad():
                out = self.model.generate(**inputs, max_length=50, num_beams=5)
            
            caption = self.processor.decode(out[0], skip_special_tokens=True)
            return caption
        except Exception as e:
            print(f"Error generating caption for {image_path}: {e}")
            return ""
    
    def extract_keywords_from_caption(self, caption: str) -> List[str]:
        """Extract agriculture-relevant keywords from caption"""
        keywords = []
        caption_lower = caption.lower()
        
        # Extract keywords from each category
        for category, terms in self.agriculture_keywords.items():
            for term in terms:
                if term in caption_lower:
                    keywords.append(term)
        
        # Add general descriptive words
        descriptive_words = re.findall(r'\b(?:green|fresh|organic|rural|outdoor|sunny|large|small|young|old|male|female)\b', caption_lower)
        keywords.extend(descriptive_words)
        
        # Remove duplicates and limit to 10 keywords
        keywords = list(set(keywords))[:10]
        
        return keywords
    
    def generate_keywords(self, image_path: str) -> Dict[str, any]:
        """Generate keywords and title for an agricultural image"""
        caption = self.generate_caption(image_path)
        keywords = self.extract_keywords_from_caption(caption)
        
        # If we don't have enough keywords, add some generic agricultural terms
        if len(keywords) < 5:
            generic_terms = ['agriculture', 'farming', 'rural', 'outdoor', 'field']
            for term in generic_terms:
                if term not in keywords:
                    keywords.append(term)
                if len(keywords) >= 5:
                    break
        
        return {
            'caption': caption,
            'keywords': keywords[:10],  # Limit to 10 keywords max
            'title': self.generate_title(caption)
        }
    
    def generate_title(self, caption: str) -> str:
        """Generate a product title from the caption"""
        # Clean up the caption to make it more title-like
        title = caption.strip()
        if title and not title[0].isupper():
            title = title[0].upper() + title[1:]
        
        # Add "Agricultural" prefix if not agriculture-related
        agriculture_terms = ['farm', 'agriculture', 'crop', 'livestock', 'rural']
        if not any(term in title.lower() for term in agriculture_terms):
            title = f"Agricultural scene: {title}"
        
        return title
Complete Smart Farm Photo Keyword Tagging AI System - All deliverables ready 2025-07-16 20:24:25 +01:00			`"""`
			`Agricultural Photo Keyword Generator using BLIP-2 model`
			`"""`

			`import torch`
			`from transformers import BlipProcessor, BlipForConditionalGeneration`
			`from PIL import Image`
			`import re`
			`from typing import List, Dict, Optional`

			`class AgricultureKeywordGenerator:`
			`def __init__(self):`
			`"""Initialize the BLIP-2 model for image captioning and keyword generation"""`
			`print("Loading BLIP model for keyword generation...")`
			`self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")`
			`self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")`

			`# Agriculture-specific keywords to enhance results`
			`self.agriculture_keywords = {`
			`'people': ['farmer', 'rancher', 'agricultural worker', 'farm worker', 'dairy farmer'],`
			`'animals': ['cow', 'cattle', 'pig', 'chicken', 'livestock', 'dairy cow', 'beef cattle'],`
			`'crops': ['corn', 'wheat', 'soybean', 'cotton', 'rice', 'barley', 'oats'],`
			`'equipment': ['tractor', 'harvester', 'plow', 'irrigation', 'farm equipment'],`
			`'locations': ['field', 'farm', 'barn', 'pasture', 'greenhouse', 'ranch', 'farmland'],`
			`'activities': ['planting', 'harvesting', 'milking', 'feeding', 'cultivation']`
			`}`

			`print("Model loaded successfully!")`

			`def generate_caption(self, image_path: str) -> str:`
			`"""Generate a descriptive caption for the image"""`
			`try:`
			`image = Image.open(image_path).convert('RGB')`
			`inputs = self.processor(image, return_tensors="pt")`

			`with torch.no_grad():`
			`out = self.model.generate(**inputs, max_length=50, num_beams=5)`

			`caption = self.processor.decode(out[0], skip_special_tokens=True)`
			`return caption`
			`except Exception as e:`
			`print(f"Error generating caption for {image_path}: {e}")`
			`return ""`

			`def extract_keywords_from_caption(self, caption: str) -> List[str]:`
			`"""Extract agriculture-relevant keywords from caption"""`
			`keywords = []`
			`caption_lower = caption.lower()`

			`# Extract keywords from each category`
			`for category, terms in self.agriculture_keywords.items():`
			`for term in terms:`
			`if term in caption_lower:`
			`keywords.append(term)`

			`# Add general descriptive words`
			`descriptive_words = re.findall(r'\b(?:green\|fresh\|organic\|rural\|outdoor\|sunny\|large\|small\|young\|old\|male\|female)\b', caption_lower)`
			`keywords.extend(descriptive_words)`

			`# Remove duplicates and limit to 10 keywords`
			`keywords = list(set(keywords))[:10]`

			`return keywords`

			`def generate_keywords(self, image_path: str) -> Dict[str, any]:`
			`"""Generate keywords and title for an agricultural image"""`
			`caption = self.generate_caption(image_path)`
			`keywords = self.extract_keywords_from_caption(caption)`

			`# If we don't have enough keywords, add some generic agricultural terms`
			`if len(keywords) < 5:`
			`generic_terms = ['agriculture', 'farming', 'rural', 'outdoor', 'field']`
			`for term in generic_terms:`
			`if term not in keywords:`
			`keywords.append(term)`
			`if len(keywords) >= 5:`
			`break`

			`return {`
			`'caption': caption,`
			`'keywords': keywords[:10], # Limit to 10 keywords max`
			`'title': self.generate_title(caption)`
			`}`

			`def generate_title(self, caption: str) -> str:`
			`"""Generate a product title from the caption"""`
			`# Clean up the caption to make it more title-like`
			`title = caption.strip()`
			`if title and not title[0].isupper():`
			`title = title[0].upper() + title[1:]`

			`# Add "Agricultural" prefix if not agriculture-related`
			`agriculture_terms = ['farm', 'agriculture', 'crop', 'livestock', 'rural']`
			`if not any(term in title.lower() for term in agriculture_terms):`
			`title = f"Agricultural scene: {title}"`

			`return title`