update for strictly inference based

This commit is contained in:
Ayomide
2025-07-28 22:32:40 +01:00
parent 941fe84236
commit 5473e4919a
3 changed files with 47 additions and 31 deletions
+22 -18
View File
@@ -9,56 +9,60 @@ from utils.helpers import setup_logging
setup_logging(level=logging.INFO) setup_logging(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def main(): def main():
""" """
Main function to run the Smart Farm Photo Keyword Tagging AI system. Main function to run the Smart Farm Photo Keyword Tagging AI system.
It initializes the tagger, loads images and optional human keywords, It initializes the tagger, loads images and optional human keywords,
processes the images in a batch, and saves the results. processes the images in a batch, and saves the results.
"""
logger.info("Starting Smart Farm Tagger application...")
# Initialize the tagger This version is inference-only - no training or fine-tuning capabilities.
"""
logger.info("Starting Smart Farm Tagger application (Inference Mode)...")
# Initialize the tagger (inference-only)
try: try:
tagger = SmartFarmTagger() tagger = SmartFarmTagger()
logger.info("SmartFarmTagger initialized in inference mode")
except Exception as e: except Exception as e:
logger.critical(f"Failed to initialize SmartFarmTagger: {e}. Exiting.") logger.critical(f"Failed to initialize SmartFarmTagger: {e}. Exiting.")
return return
# Setup directories # Setup directories
data_raw_images_dir = Path("data/raw/images") data_raw_images_dir = Path("data/raw/images")
output_dir = Path("outputs") output_dir = Path("outputs")
# Ensure output directory exists # Ensure output directory exists
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
# Get image files # Get image files
image_paths = get_image_paths(data_raw_images_dir) image_paths = get_image_paths(data_raw_images_dir)
if not image_paths: if not image_paths:
logger.warning(f"No images found in {data_raw_images_dir}. Please add images to process.") logger.warning(f"No images found in {data_raw_images_dir}. Please add images to process.")
# Optionally, create the raw images directory if it doesn't exist # Optionally, create the raw images directory if it doesn't exist
data_raw_images_dir.mkdir(parents=True, exist_ok=True) data_raw_images_dir.mkdir(parents=True, exist_ok=True)
print(f"Created directory structure. Please add images to {data_raw_images_dir}") print(f"Created directory structure. Please add images to {data_raw_images_dir}")
return return
logger.info(f"Found {len(image_paths)} images to process.") logger.info(f"Found {len(image_paths)} images to process.")
# Load human keywords if available for comparison or augmentation # Load human keywords if available for comparison or augmentation
human_keywords_path = "data/human_keywords.csv" human_keywords_path = "data/human_keywords.csv"
human_keywords_map = load_human_keywords(human_keywords_path) human_keywords_map = load_human_keywords(human_keywords_path)
if human_keywords_map: if human_keywords_map:
logger.info(f"Loaded human keywords for {len(human_keywords_map)} images.") logger.info(f"Loaded human keywords for {len(human_keywords_map)} images.")
# Define output CSV path # Define output CSV path
results_csv_path = output_dir / "farm_photo_keywords.csv" results_csv_path = output_dir / "farm_photo_keywords.csv"
# Process batch of images # Process batch of images (inference only)
results_df = tagger.process_batch( results_df = tagger.process_batch(
image_paths=[str(p) for p in image_paths], # Convert Path objects to strings for compatibility image_paths=[str(p) for p in image_paths], # Convert Path objects to strings for compatibility
output_path=str(results_csv_path), output_path=str(results_csv_path),
human_keywords_map=human_keywords_map human_keywords_map=human_keywords_map
) )
# Print summary # Print summary
print("\n" + "="*30) print("\n" + "="*30)
print("=== Processing Summary ===") print("=== Processing Summary ===")
@@ -72,7 +76,7 @@ def main():
print("No AI keywords generated successfully.") print("No AI keywords generated successfully.")
print(f"Results saved to: {results_csv_path}") print(f"Results saved to: {results_csv_path}")
print("="*30) print("="*30)
# Show results # Show results
if not results_df.empty: if not results_df.empty:
print("\n=== Sample Results (First 3) ===") print("\n=== Sample Results (First 3) ===")
@@ -80,7 +84,7 @@ def main():
print(f"\nImage: {row['filename']}") print(f"\nImage: {row['filename']}")
print(f"AI Title: {row['ai_title']}") print(f"AI Title: {row['ai_title']}")
print(f"AI Keywords: {', '.join(row['ai_keywords'])}") print(f"AI Keywords: {', '.join(row['ai_keywords'])}")
if row['location'] and row['location'] != "None": # Check if location is not None or "None" string if row['location'] and row['location'] != "None": # Check if location is not None or "None" string
print(f"Location: {row['location']}") print(f"Location: {row['location']}")
if row['human_keywords']: if row['human_keywords']:
print(f"Human Keywords: {', '.join(row['human_keywords'])}") print(f"Human Keywords: {', '.join(row['human_keywords'])}")
@@ -88,6 +92,6 @@ def main():
else: else:
print("\nNo results to display.") print("\nNo results to display.")
if __name__ == "__main__": # Fixed: was name -> __name__
if __name__ == "__main__":
main() main()
+23 -11
View File
@@ -16,11 +16,13 @@ class SmartFarmTagger:
AI system for generating agricultural keywords and titles for farm photos. AI system for generating agricultural keywords and titles for farm photos.
It leverages a vision-language model (BLIP) for image captioning and It leverages a vision-language model (BLIP) for image captioning and
incorporates an agricultural vocabulary for enhanced keyword generation. incorporates an agricultural vocabulary for enhanced keyword generation.
This version is inference-only - no training or fine-tuning capabilities.
""" """
def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"):
""" """
Initialize the tagger with a vision-language model and an agricultural vocabulary. Initialize the tagger with a pre-trained vision-language model for inference only.
Args: Args:
model_name (str): The name of the pre-trained BLIP model to use. model_name (str): The name of the pre-trained BLIP model to use.
@@ -28,11 +30,15 @@ class SmartFarmTagger:
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}") logger.info(f"Using device: {self.device}")
# Load vision-language model for image understanding # Load pre-trained vision-language model for inference only
try: try:
self.processor = BlipProcessor.from_pretrained(model_name) self.processor = BlipProcessor.from_pretrained(model_name)
self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device) self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device)
logger.info(f"Successfully loaded model: {model_name}")
# Set model to evaluation mode (inference only)
self.model.eval()
logger.info(f"Successfully loaded pre-trained model: {model_name} (Inference Mode)")
except Exception as e: except Exception as e:
logger.error(f"Failed to load BLIP model {model_name}: {e}") logger.error(f"Failed to load BLIP model {model_name}: {e}")
raise raise
@@ -79,7 +85,8 @@ class SmartFarmTagger:
def generate_caption(self, image: Image.Image) -> str: def generate_caption(self, image: Image.Image) -> str:
""" """
Generate a descriptive caption for the given image using the BLIP model. Generate a descriptive caption for the given image using the pre-trained BLIP model.
Inference only - no model updates.
Args: Args:
image (Image.Image): The PIL Image object to caption. image (Image.Image): The PIL Image object to caption.
@@ -90,6 +97,7 @@ class SmartFarmTagger:
try: try:
inputs = self.processor(images=image, return_tensors="pt").to(self.device) inputs = self.processor(images=image, return_tensors="pt").to(self.device)
# Inference only - no gradient computation
with torch.no_grad(): with torch.no_grad():
out = self.model.generate(**inputs, max_length=50, num_beams=5) out = self.model.generate(**inputs, max_length=50, num_beams=5)
@@ -104,6 +112,7 @@ class SmartFarmTagger:
""" """
Extract agricultural keywords from the generated caption and optional image context. Extract agricultural keywords from the generated caption and optional image context.
This method uses a predefined agricultural vocabulary and common patterns. This method uses a predefined agricultural vocabulary and common patterns.
Pure inference - no learning or updating of vocabulary.
Args: Args:
caption (str): The generated caption for the image. caption (str): The generated caption for the image.
@@ -117,7 +126,7 @@ class SmartFarmTagger:
context_lower = image_context.lower() context_lower = image_context.lower()
full_text = f"{caption_lower} {context_lower}" full_text = f"{caption_lower} {context_lower}"
# Extract keywords from agricultural vocabulary # Extract keywords from predefined agricultural vocabulary (inference only)
for category, terms in self.ag_vocabulary.items(): for category, terms in self.ag_vocabulary.items():
for term in terms: for term in terms:
if term in full_text: if term in full_text:
@@ -153,11 +162,12 @@ class SmartFarmTagger:
if len(keyword_list) >= 5: if len(keyword_list) >= 5:
break break
return sorted(list(set(keyword_list)))[:10] # Limit to 10 unique keywords and sort return sorted(list(set(keyword_list)))[:10] # Limit to 10 unique keywords and sort
def generate_title(self, caption: str, keywords: List[str]) -> str: def generate_title(self, caption: str, keywords: List[str]) -> str:
""" """
Generate a descriptive product title based on the caption and keywords. Generate a descriptive product title based on the caption and keywords.
Pure inference - no learning involved.
Args: Args:
caption (str): The generated image caption. caption (str): The generated image caption.
@@ -188,7 +198,7 @@ class SmartFarmTagger:
def process_single_image(self, image_path: str, human_keywords: Optional[List[str]] = None) -> Dict: def process_single_image(self, image_path: str, human_keywords: Optional[List[str]] = None) -> Dict:
""" """
Process a single image to generate AI-driven tags (caption, keywords, title) Process a single image to generate AI-driven tags (caption, keywords, title)
and extract EXIF location data. and extract EXIF location data. Inference only.
Args: Args:
image_path (str): The file path to the image. image_path (str): The file path to the image.
@@ -206,17 +216,17 @@ class SmartFarmTagger:
if img.mode != 'RGB': if img.mode != 'RGB':
img = img.convert('RGB') img = img.convert('RGB')
# Generate caption # Generate caption (inference only)
caption = self.generate_caption(img) caption = self.generate_caption(img)
logger.info(f"Generated caption for {Path(image_path).name}: {caption}") logger.info(f"Generated caption for {Path(image_path).name}: {caption}")
# Extract location from EXIF # Extract location from EXIF
location = self.extract_location_from_exif(image_path) location = self.extract_location_from_exif(image_path)
# Generate keywords # Generate keywords (inference only)
ai_keywords = self.extract_agricultural_keywords(caption) ai_keywords = self.extract_agricultural_keywords(caption)
# Generate title # Generate title (inference only)
ai_title = self.generate_title(caption, ai_keywords) ai_title = self.generate_title(caption, ai_keywords)
result = { result = {
@@ -249,6 +259,7 @@ class SmartFarmTagger:
human_keywords_map: Optional[Dict[str, List[str]]] = None) -> pd.DataFrame: human_keywords_map: Optional[Dict[str, List[str]]] = None) -> pd.DataFrame:
""" """
Process a batch of images and save the results to a CSV file. Process a batch of images and save the results to a CSV file.
Inference only - no training or model updates.
Args: Args:
image_paths (List[str]): A list of file paths to the images to process. image_paths (List[str]): A list of file paths to the images to process.
@@ -262,12 +273,13 @@ class SmartFarmTagger:
results = [] results = []
human_keywords_map = human_keywords_map or {} human_keywords_map = human_keywords_map or {}
logger.info(f"Starting batch processing for {len(image_paths)} images...") logger.info(f"Starting batch processing for {len(image_paths)} images (Inference Mode)...")
for i, image_path in enumerate(image_paths): for i, image_path in enumerate(image_paths):
filename = Path(image_path).name filename = Path(image_path).name
human_keywords = human_keywords_map.get(filename, []) human_keywords = human_keywords_map.get(filename, [])
# Process single image (inference only)
result = self.process_single_image(image_path, human_keywords) result = self.process_single_image(image_path, human_keywords)
results.append(result) results.append(result)
+2 -2
View File
@@ -3,11 +3,11 @@ import logging
def setup_logging(level=logging.INFO): def setup_logging(level=logging.INFO):
""" """
Logging configuration. Logging configuration for inference mode.
""" """
logging.basicConfig(level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.basicConfig(level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.info("Logging configured.") logger.info("Logging configured for inference mode.")
def clean_string(text: str) -> str: def clean_string(text: str) -> str: