update for strictly inference based
This commit is contained in:
+22
-18
@@ -9,56 +9,60 @@ from utils.helpers import setup_logging
|
|||||||
setup_logging(level=logging.INFO)
|
setup_logging(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
Main function to run the Smart Farm Photo Keyword Tagging AI system.
|
Main function to run the Smart Farm Photo Keyword Tagging AI system.
|
||||||
It initializes the tagger, loads images and optional human keywords,
|
It initializes the tagger, loads images and optional human keywords,
|
||||||
processes the images in a batch, and saves the results.
|
processes the images in a batch, and saves the results.
|
||||||
"""
|
|
||||||
logger.info("Starting Smart Farm Tagger application...")
|
|
||||||
|
|
||||||
# Initialize the tagger
|
This version is inference-only - no training or fine-tuning capabilities.
|
||||||
|
"""
|
||||||
|
logger.info("Starting Smart Farm Tagger application (Inference Mode)...")
|
||||||
|
|
||||||
|
# Initialize the tagger (inference-only)
|
||||||
try:
|
try:
|
||||||
tagger = SmartFarmTagger()
|
tagger = SmartFarmTagger()
|
||||||
|
logger.info("SmartFarmTagger initialized in inference mode")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.critical(f"Failed to initialize SmartFarmTagger: {e}. Exiting.")
|
logger.critical(f"Failed to initialize SmartFarmTagger: {e}. Exiting.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Setup directories
|
# Setup directories
|
||||||
data_raw_images_dir = Path("data/raw/images")
|
data_raw_images_dir = Path("data/raw/images")
|
||||||
output_dir = Path("outputs")
|
output_dir = Path("outputs")
|
||||||
|
|
||||||
# Ensure output directory exists
|
# Ensure output directory exists
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Get image files
|
# Get image files
|
||||||
image_paths = get_image_paths(data_raw_images_dir)
|
image_paths = get_image_paths(data_raw_images_dir)
|
||||||
|
|
||||||
if not image_paths:
|
if not image_paths:
|
||||||
logger.warning(f"No images found in {data_raw_images_dir}. Please add images to process.")
|
logger.warning(f"No images found in {data_raw_images_dir}. Please add images to process.")
|
||||||
# Optionally, create the raw images directory if it doesn't exist
|
# Optionally, create the raw images directory if it doesn't exist
|
||||||
data_raw_images_dir.mkdir(parents=True, exist_ok=True)
|
data_raw_images_dir.mkdir(parents=True, exist_ok=True)
|
||||||
print(f"Created directory structure. Please add images to {data_raw_images_dir}")
|
print(f"Created directory structure. Please add images to {data_raw_images_dir}")
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info(f"Found {len(image_paths)} images to process.")
|
logger.info(f"Found {len(image_paths)} images to process.")
|
||||||
|
|
||||||
# Load human keywords if available for comparison or augmentation
|
# Load human keywords if available for comparison or augmentation
|
||||||
human_keywords_path = "data/human_keywords.csv"
|
human_keywords_path = "data/human_keywords.csv"
|
||||||
human_keywords_map = load_human_keywords(human_keywords_path)
|
human_keywords_map = load_human_keywords(human_keywords_path)
|
||||||
if human_keywords_map:
|
if human_keywords_map:
|
||||||
logger.info(f"Loaded human keywords for {len(human_keywords_map)} images.")
|
logger.info(f"Loaded human keywords for {len(human_keywords_map)} images.")
|
||||||
|
|
||||||
# Define output CSV path
|
# Define output CSV path
|
||||||
results_csv_path = output_dir / "farm_photo_keywords.csv"
|
results_csv_path = output_dir / "farm_photo_keywords.csv"
|
||||||
|
|
||||||
# Process batch of images
|
# Process batch of images (inference only)
|
||||||
results_df = tagger.process_batch(
|
results_df = tagger.process_batch(
|
||||||
image_paths=[str(p) for p in image_paths], # Convert Path objects to strings for compatibility
|
image_paths=[str(p) for p in image_paths], # Convert Path objects to strings for compatibility
|
||||||
output_path=str(results_csv_path),
|
output_path=str(results_csv_path),
|
||||||
human_keywords_map=human_keywords_map
|
human_keywords_map=human_keywords_map
|
||||||
)
|
)
|
||||||
|
|
||||||
# Print summary
|
# Print summary
|
||||||
print("\n" + "="*30)
|
print("\n" + "="*30)
|
||||||
print("=== Processing Summary ===")
|
print("=== Processing Summary ===")
|
||||||
@@ -72,7 +76,7 @@ def main():
|
|||||||
print("No AI keywords generated successfully.")
|
print("No AI keywords generated successfully.")
|
||||||
print(f"Results saved to: {results_csv_path}")
|
print(f"Results saved to: {results_csv_path}")
|
||||||
print("="*30)
|
print("="*30)
|
||||||
|
|
||||||
# Show results
|
# Show results
|
||||||
if not results_df.empty:
|
if not results_df.empty:
|
||||||
print("\n=== Sample Results (First 3) ===")
|
print("\n=== Sample Results (First 3) ===")
|
||||||
@@ -80,7 +84,7 @@ def main():
|
|||||||
print(f"\nImage: {row['filename']}")
|
print(f"\nImage: {row['filename']}")
|
||||||
print(f"AI Title: {row['ai_title']}")
|
print(f"AI Title: {row['ai_title']}")
|
||||||
print(f"AI Keywords: {', '.join(row['ai_keywords'])}")
|
print(f"AI Keywords: {', '.join(row['ai_keywords'])}")
|
||||||
if row['location'] and row['location'] != "None": # Check if location is not None or "None" string
|
if row['location'] and row['location'] != "None": # Check if location is not None or "None" string
|
||||||
print(f"Location: {row['location']}")
|
print(f"Location: {row['location']}")
|
||||||
if row['human_keywords']:
|
if row['human_keywords']:
|
||||||
print(f"Human Keywords: {', '.join(row['human_keywords'])}")
|
print(f"Human Keywords: {', '.join(row['human_keywords'])}")
|
||||||
@@ -88,6 +92,6 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print("\nNo results to display.")
|
print("\nNo results to display.")
|
||||||
|
|
||||||
|
if __name__ == "__main__": # Fixed: was name -> __name__
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|||||||
@@ -16,11 +16,13 @@ class SmartFarmTagger:
|
|||||||
AI system for generating agricultural keywords and titles for farm photos.
|
AI system for generating agricultural keywords and titles for farm photos.
|
||||||
It leverages a vision-language model (BLIP) for image captioning and
|
It leverages a vision-language model (BLIP) for image captioning and
|
||||||
incorporates an agricultural vocabulary for enhanced keyword generation.
|
incorporates an agricultural vocabulary for enhanced keyword generation.
|
||||||
|
|
||||||
|
This version is inference-only - no training or fine-tuning capabilities.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"):
|
def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"):
|
||||||
"""
|
"""
|
||||||
Initialize the tagger with a vision-language model and an agricultural vocabulary.
|
Initialize the tagger with a pre-trained vision-language model for inference only.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model_name (str): The name of the pre-trained BLIP model to use.
|
model_name (str): The name of the pre-trained BLIP model to use.
|
||||||
@@ -28,11 +30,15 @@ class SmartFarmTagger:
|
|||||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
logger.info(f"Using device: {self.device}")
|
logger.info(f"Using device: {self.device}")
|
||||||
|
|
||||||
# Load vision-language model for image understanding
|
# Load pre-trained vision-language model for inference only
|
||||||
try:
|
try:
|
||||||
self.processor = BlipProcessor.from_pretrained(model_name)
|
self.processor = BlipProcessor.from_pretrained(model_name)
|
||||||
self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device)
|
self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device)
|
||||||
logger.info(f"Successfully loaded model: {model_name}")
|
|
||||||
|
# Set model to evaluation mode (inference only)
|
||||||
|
self.model.eval()
|
||||||
|
|
||||||
|
logger.info(f"Successfully loaded pre-trained model: {model_name} (Inference Mode)")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to load BLIP model {model_name}: {e}")
|
logger.error(f"Failed to load BLIP model {model_name}: {e}")
|
||||||
raise
|
raise
|
||||||
@@ -79,7 +85,8 @@ class SmartFarmTagger:
|
|||||||
|
|
||||||
def generate_caption(self, image: Image.Image) -> str:
|
def generate_caption(self, image: Image.Image) -> str:
|
||||||
"""
|
"""
|
||||||
Generate a descriptive caption for the given image using the BLIP model.
|
Generate a descriptive caption for the given image using the pre-trained BLIP model.
|
||||||
|
Inference only - no model updates.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image (Image.Image): The PIL Image object to caption.
|
image (Image.Image): The PIL Image object to caption.
|
||||||
@@ -90,6 +97,7 @@ class SmartFarmTagger:
|
|||||||
try:
|
try:
|
||||||
inputs = self.processor(images=image, return_tensors="pt").to(self.device)
|
inputs = self.processor(images=image, return_tensors="pt").to(self.device)
|
||||||
|
|
||||||
|
# Inference only - no gradient computation
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
out = self.model.generate(**inputs, max_length=50, num_beams=5)
|
out = self.model.generate(**inputs, max_length=50, num_beams=5)
|
||||||
|
|
||||||
@@ -104,6 +112,7 @@ class SmartFarmTagger:
|
|||||||
"""
|
"""
|
||||||
Extract agricultural keywords from the generated caption and optional image context.
|
Extract agricultural keywords from the generated caption and optional image context.
|
||||||
This method uses a predefined agricultural vocabulary and common patterns.
|
This method uses a predefined agricultural vocabulary and common patterns.
|
||||||
|
Pure inference - no learning or updating of vocabulary.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
caption (str): The generated caption for the image.
|
caption (str): The generated caption for the image.
|
||||||
@@ -117,7 +126,7 @@ class SmartFarmTagger:
|
|||||||
context_lower = image_context.lower()
|
context_lower = image_context.lower()
|
||||||
full_text = f"{caption_lower} {context_lower}"
|
full_text = f"{caption_lower} {context_lower}"
|
||||||
|
|
||||||
# Extract keywords from agricultural vocabulary
|
# Extract keywords from predefined agricultural vocabulary (inference only)
|
||||||
for category, terms in self.ag_vocabulary.items():
|
for category, terms in self.ag_vocabulary.items():
|
||||||
for term in terms:
|
for term in terms:
|
||||||
if term in full_text:
|
if term in full_text:
|
||||||
@@ -153,11 +162,12 @@ class SmartFarmTagger:
|
|||||||
if len(keyword_list) >= 5:
|
if len(keyword_list) >= 5:
|
||||||
break
|
break
|
||||||
|
|
||||||
return sorted(list(set(keyword_list)))[:10] # Limit to 10 unique keywords and sort
|
return sorted(list(set(keyword_list)))[:10] # Limit to 10 unique keywords and sort
|
||||||
|
|
||||||
def generate_title(self, caption: str, keywords: List[str]) -> str:
|
def generate_title(self, caption: str, keywords: List[str]) -> str:
|
||||||
"""
|
"""
|
||||||
Generate a descriptive product title based on the caption and keywords.
|
Generate a descriptive product title based on the caption and keywords.
|
||||||
|
Pure inference - no learning involved.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
caption (str): The generated image caption.
|
caption (str): The generated image caption.
|
||||||
@@ -188,7 +198,7 @@ class SmartFarmTagger:
|
|||||||
def process_single_image(self, image_path: str, human_keywords: Optional[List[str]] = None) -> Dict:
|
def process_single_image(self, image_path: str, human_keywords: Optional[List[str]] = None) -> Dict:
|
||||||
"""
|
"""
|
||||||
Process a single image to generate AI-driven tags (caption, keywords, title)
|
Process a single image to generate AI-driven tags (caption, keywords, title)
|
||||||
and extract EXIF location data.
|
and extract EXIF location data. Inference only.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image_path (str): The file path to the image.
|
image_path (str): The file path to the image.
|
||||||
@@ -206,17 +216,17 @@ class SmartFarmTagger:
|
|||||||
if img.mode != 'RGB':
|
if img.mode != 'RGB':
|
||||||
img = img.convert('RGB')
|
img = img.convert('RGB')
|
||||||
|
|
||||||
# Generate caption
|
# Generate caption (inference only)
|
||||||
caption = self.generate_caption(img)
|
caption = self.generate_caption(img)
|
||||||
logger.info(f"Generated caption for {Path(image_path).name}: {caption}")
|
logger.info(f"Generated caption for {Path(image_path).name}: {caption}")
|
||||||
|
|
||||||
# Extract location from EXIF
|
# Extract location from EXIF
|
||||||
location = self.extract_location_from_exif(image_path)
|
location = self.extract_location_from_exif(image_path)
|
||||||
|
|
||||||
# Generate keywords
|
# Generate keywords (inference only)
|
||||||
ai_keywords = self.extract_agricultural_keywords(caption)
|
ai_keywords = self.extract_agricultural_keywords(caption)
|
||||||
|
|
||||||
# Generate title
|
# Generate title (inference only)
|
||||||
ai_title = self.generate_title(caption, ai_keywords)
|
ai_title = self.generate_title(caption, ai_keywords)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
@@ -249,6 +259,7 @@ class SmartFarmTagger:
|
|||||||
human_keywords_map: Optional[Dict[str, List[str]]] = None) -> pd.DataFrame:
|
human_keywords_map: Optional[Dict[str, List[str]]] = None) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Process a batch of images and save the results to a CSV file.
|
Process a batch of images and save the results to a CSV file.
|
||||||
|
Inference only - no training or model updates.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image_paths (List[str]): A list of file paths to the images to process.
|
image_paths (List[str]): A list of file paths to the images to process.
|
||||||
@@ -262,12 +273,13 @@ class SmartFarmTagger:
|
|||||||
results = []
|
results = []
|
||||||
human_keywords_map = human_keywords_map or {}
|
human_keywords_map = human_keywords_map or {}
|
||||||
|
|
||||||
logger.info(f"Starting batch processing for {len(image_paths)} images...")
|
logger.info(f"Starting batch processing for {len(image_paths)} images (Inference Mode)...")
|
||||||
|
|
||||||
for i, image_path in enumerate(image_paths):
|
for i, image_path in enumerate(image_paths):
|
||||||
filename = Path(image_path).name
|
filename = Path(image_path).name
|
||||||
human_keywords = human_keywords_map.get(filename, [])
|
human_keywords = human_keywords_map.get(filename, [])
|
||||||
|
|
||||||
|
# Process single image (inference only)
|
||||||
result = self.process_single_image(image_path, human_keywords)
|
result = self.process_single_image(image_path, human_keywords)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ import logging
|
|||||||
|
|
||||||
def setup_logging(level=logging.INFO):
|
def setup_logging(level=logging.INFO):
|
||||||
"""
|
"""
|
||||||
Logging configuration.
|
Logging configuration for inference mode.
|
||||||
"""
|
"""
|
||||||
logging.basicConfig(level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info("Logging configured.")
|
logger.info("Logging configured for inference mode.")
|
||||||
|
|
||||||
|
|
||||||
def clean_string(text: str) -> str:
|
def clean_string(text: str) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user