From 5473e4919a9f0f9a5dcdd219b827adddd6e42922 Mon Sep 17 00:00:00 2001 From: Ayomide Date: Mon, 28 Jul 2025 22:32:40 +0100 Subject: [PATCH] update for strictly inference based --- src/main.py | 40 +++++++++++++++++++--------------- src/model/smart_farm_tagger.py | 34 +++++++++++++++++++---------- src/utils/helpers.py | 4 ++-- 3 files changed, 47 insertions(+), 31 deletions(-) diff --git a/src/main.py b/src/main.py index 3826056..bd95b98 100644 --- a/src/main.py +++ b/src/main.py @@ -9,56 +9,60 @@ from utils.helpers import setup_logging setup_logging(level=logging.INFO) logger = logging.getLogger(__name__) + def main(): """ Main function to run the Smart Farm Photo Keyword Tagging AI system. It initializes the tagger, loads images and optional human keywords, processes the images in a batch, and saves the results. - """ - logger.info("Starting Smart Farm Tagger application...") - # Initialize the tagger + This version is inference-only - no training or fine-tuning capabilities. + """ + logger.info("Starting Smart Farm Tagger application (Inference Mode)...") + + # Initialize the tagger (inference-only) try: tagger = SmartFarmTagger() + logger.info("SmartFarmTagger initialized in inference mode") except Exception as e: logger.critical(f"Failed to initialize SmartFarmTagger: {e}. Exiting.") return - + # Setup directories data_raw_images_dir = Path("data/raw/images") output_dir = Path("outputs") - + # Ensure output directory exists output_dir.mkdir(parents=True, exist_ok=True) - + # Get image files image_paths = get_image_paths(data_raw_images_dir) - + if not image_paths: logger.warning(f"No images found in {data_raw_images_dir}. Please add images to process.") # Optionally, create the raw images directory if it doesn't exist data_raw_images_dir.mkdir(parents=True, exist_ok=True) print(f"Created directory structure. Please add images to {data_raw_images_dir}") return - + logger.info(f"Found {len(image_paths)} images to process.") - + # Load human keywords if available for comparison or augmentation human_keywords_path = "data/human_keywords.csv" human_keywords_map = load_human_keywords(human_keywords_path) if human_keywords_map: logger.info(f"Loaded human keywords for {len(human_keywords_map)} images.") - + # Define output CSV path results_csv_path = output_dir / "farm_photo_keywords.csv" - - # Process batch of images + + # Process batch of images (inference only) results_df = tagger.process_batch( - image_paths=[str(p) for p in image_paths], # Convert Path objects to strings for compatibility + image_paths=[str(p) for p in image_paths], # Convert Path objects to strings for compatibility output_path=str(results_csv_path), human_keywords_map=human_keywords_map ) - + # Print summary print("\n" + "="*30) print("=== Processing Summary ===") @@ -72,7 +76,7 @@ def main(): print("No AI keywords generated successfully.") print(f"Results saved to: {results_csv_path}") print("="*30) - + # Show results if not results_df.empty: print("\n=== Sample Results (First 3) ===") @@ -80,7 +84,7 @@ def main(): print(f"\nImage: {row['filename']}") print(f"AI Title: {row['ai_title']}") print(f"AI Keywords: {', '.join(row['ai_keywords'])}") - if row['location'] and row['location'] != "None": # Check if location is not None or "None" string + if row['location'] and row['location'] != "None": # Check if location is not None or "None" string print(f"Location: {row['location']}") if row['human_keywords']: print(f"Human Keywords: {', '.join(row['human_keywords'])}") @@ -88,6 +92,6 @@ def main(): else: print("\nNo results to display.") - -if __name__ == "__main__": +if __name__ == "__main__": # Fixed: was name -> __name__ main() + diff --git a/src/model/smart_farm_tagger.py b/src/model/smart_farm_tagger.py index 2bea920..2e8d5b7 100644 --- a/src/model/smart_farm_tagger.py +++ b/src/model/smart_farm_tagger.py @@ -16,11 +16,13 @@ class SmartFarmTagger: AI system for generating agricultural keywords and titles for farm photos. It leverages a vision-language model (BLIP) for image captioning and incorporates an agricultural vocabulary for enhanced keyword generation. + + This version is inference-only - no training or fine-tuning capabilities. """ def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): """ - Initialize the tagger with a vision-language model and an agricultural vocabulary. + Initialize the tagger with a pre-trained vision-language model for inference only. Args: model_name (str): The name of the pre-trained BLIP model to use. @@ -28,11 +30,15 @@ class SmartFarmTagger: self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {self.device}") - # Load vision-language model for image understanding + # Load pre-trained vision-language model for inference only try: self.processor = BlipProcessor.from_pretrained(model_name) self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device) - logger.info(f"Successfully loaded model: {model_name}") + + # Set model to evaluation mode (inference only) + self.model.eval() + + logger.info(f"Successfully loaded pre-trained model: {model_name} (Inference Mode)") except Exception as e: logger.error(f"Failed to load BLIP model {model_name}: {e}") raise @@ -79,7 +85,8 @@ class SmartFarmTagger: def generate_caption(self, image: Image.Image) -> str: """ - Generate a descriptive caption for the given image using the BLIP model. + Generate a descriptive caption for the given image using the pre-trained BLIP model. + Inference only - no model updates. Args: image (Image.Image): The PIL Image object to caption. @@ -90,6 +97,7 @@ class SmartFarmTagger: try: inputs = self.processor(images=image, return_tensors="pt").to(self.device) + # Inference only - no gradient computation with torch.no_grad(): out = self.model.generate(**inputs, max_length=50, num_beams=5) @@ -104,6 +112,7 @@ class SmartFarmTagger: """ Extract agricultural keywords from the generated caption and optional image context. This method uses a predefined agricultural vocabulary and common patterns. + Pure inference - no learning or updating of vocabulary. Args: caption (str): The generated caption for the image. @@ -117,7 +126,7 @@ class SmartFarmTagger: context_lower = image_context.lower() full_text = f"{caption_lower} {context_lower}" - # Extract keywords from agricultural vocabulary + # Extract keywords from predefined agricultural vocabulary (inference only) for category, terms in self.ag_vocabulary.items(): for term in terms: if term in full_text: @@ -153,11 +162,12 @@ class SmartFarmTagger: if len(keyword_list) >= 5: break - return sorted(list(set(keyword_list)))[:10] # Limit to 10 unique keywords and sort + return sorted(list(set(keyword_list)))[:10] # Limit to 10 unique keywords and sort def generate_title(self, caption: str, keywords: List[str]) -> str: """ Generate a descriptive product title based on the caption and keywords. + Pure inference - no learning involved. Args: caption (str): The generated image caption. @@ -188,7 +198,7 @@ class SmartFarmTagger: def process_single_image(self, image_path: str, human_keywords: Optional[List[str]] = None) -> Dict: """ Process a single image to generate AI-driven tags (caption, keywords, title) - and extract EXIF location data. + and extract EXIF location data. Inference only. Args: image_path (str): The file path to the image. @@ -206,17 +216,17 @@ class SmartFarmTagger: if img.mode != 'RGB': img = img.convert('RGB') - # Generate caption + # Generate caption (inference only) caption = self.generate_caption(img) logger.info(f"Generated caption for {Path(image_path).name}: {caption}") # Extract location from EXIF location = self.extract_location_from_exif(image_path) - # Generate keywords + # Generate keywords (inference only) ai_keywords = self.extract_agricultural_keywords(caption) - # Generate title + # Generate title (inference only) ai_title = self.generate_title(caption, ai_keywords) result = { @@ -249,6 +259,7 @@ class SmartFarmTagger: human_keywords_map: Optional[Dict[str, List[str]]] = None) -> pd.DataFrame: """ Process a batch of images and save the results to a CSV file. + Inference only - no training or model updates. Args: image_paths (List[str]): A list of file paths to the images to process. @@ -262,12 +273,13 @@ class SmartFarmTagger: results = [] human_keywords_map = human_keywords_map or {} - logger.info(f"Starting batch processing for {len(image_paths)} images...") + logger.info(f"Starting batch processing for {len(image_paths)} images (Inference Mode)...") for i, image_path in enumerate(image_paths): filename = Path(image_path).name human_keywords = human_keywords_map.get(filename, []) + # Process single image (inference only) result = self.process_single_image(image_path, human_keywords) results.append(result) diff --git a/src/utils/helpers.py b/src/utils/helpers.py index b18e399..fe23abe 100644 --- a/src/utils/helpers.py +++ b/src/utils/helpers.py @@ -3,11 +3,11 @@ import logging def setup_logging(level=logging.INFO): """ - Logging configuration. + Logging configuration for inference mode. """ logging.basicConfig(level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) - logger.info("Logging configured.") + logger.info("Logging configured for inference mode.") def clean_string(text: str) -> str: