import asyncio import aiohttp import json from datetime import datetime, timedelta from typing import Dict, List, Optional, Tuple import re class DroneWeatherDataExtractor: """ Extract location data from drone booking forms and fetch structured weather information for later use in analysis systems. """ def __init__(self): self.base_url = "https://api.open-meteo.com/v1/forecast" def extract_coordinates(self, booking_data: Dict) -> Tuple[float, float]: """Extract latitude and longitude from booking form data.""" try: # Try to get from GPS coordinates field gps_coords = booking_data.get("site_information", {}).get("gps_coordinates", {}) if isinstance(gps_coords, dict): lat_str = gps_coords.get("latitude", "") lng_str = gps_coords.get("longitude", "") # Parse coordinate strings like "53.4408° N" and "2.2426° W" lat_match = re.search(r'([\d.]+)', lat_str) lng_match = re.search(r'([\d.]+)', lng_str) if lat_match and lng_match: latitude = float(lat_match.group(1)) longitude = float(lng_match.group(1)) # Handle direction indicators if 'S' in lat_str.upper(): latitude = -latitude if 'W' in lng_str.upper(): longitude = -longitude return latitude, longitude # Fallback: try to extract from site_location string site_location = booking_data.get("form", {}).get("site_location", "") coord_pattern = r'GPS: ([\d.]+)° ([NS]), ([\d.]+)° ([EW])' match = re.search(coord_pattern, site_location) if match: lat, lat_dir, lng, lng_dir = match.groups() latitude = float(lat) if lat_dir == 'N' else -float(lat) longitude = float(lng) if lng_dir == 'E' else -float(lng) print(f"Extracted coordinates: {latitude}, {longitude}") return latitude, longitude except (ValueError, KeyError) as e: print(f"Error extracting coordinates: {e}") raise ValueError("Could not extract valid coordinates from booking data") def extract_booking_info(self, booking_data: Dict) -> Dict: """Extract relevant booking information in structured format.""" try: # Extract date information assigned_date = booking_data.get("job_overview", {}).get("assigned_date") preferred_dates = booking_data.get("form", {}).get("preferred_dates") selected_slot = booking_data.get("form", {}).get("selected_slot") # Extract timing information timing = booking_data.get("timing", {}) start_time = timing.get("start_time") end_time = timing.get("end_time") duration = timing.get("survey_duration") # Extract site information site_info = booking_data.get("site_information", {}) form_info = booking_data.get("form", {}) return { "job_id": booking_data.get("job_id"), "job_number": booking_data.get("job_overview", {}).get("job_number"), "site_name": site_info.get("site_name"), "region": site_info.get("region"), "full_address": site_info.get("full_address"), "asset_type": form_info.get("asset_type"), "system_size": form_info.get("system_size"), "survey_purpose": form_info.get("survey_purpose"), "assigned_engineer": form_info.get("assigned_engineer"), "contact_person": form_info.get("contact_person"), "contact_phone": form_info.get("contact_phone"), "dates": { "assigned_date": assigned_date, "preferred_dates": preferred_dates, "selected_slot": selected_slot }, "timing": { "start_time": start_time, "end_time": end_time, "duration": duration, "buffer_time": timing.get("buffer_time") }, "access_details": { "access_type": form_info.get("access_type"), "vehicle_access": form_info.get("vehicle_access"), "access_details": form_info.get("access_details") }, "safety_requirements": form_info.get("special_safety_requirements"), "additional_requirements": form_info.get("additional_requirements"), "booking_timestamp": form_info.get("booking_timestamp"), "booking_id": form_info.get("booking_id") } except Exception as e: print(f"Error extracting booking info: {e}") return {} def parse_target_date(self, booking_data: Dict) -> datetime: """Extract and parse the target date from booking data.""" try: # Try different date fields date_fields = [ booking_data.get("job_overview", {}).get("assigned_date"), booking_data.get("form", {}).get("preferred_dates"), booking_data.get("form", {}).get("selected_slot") ] for date_field in date_fields: if date_field: # Handle different date formats if "January 23, 2025" in date_field or "January 23rd, 2025" in date_field: return datetime(2025, 1, 23) elif "January 24" in date_field: return datetime(2025, 1, 24) # Default fallback return datetime.now() + timedelta(days=1) except Exception as e: print(f"Error parsing date: {e}") return datetime.now() + timedelta(days=1) async def fetch_weather_data(self, latitude: float, longitude: float, target_date: datetime, days_range: int = 3) -> Dict: """Fetch structured weather data for specific coordinates and date range.""" # Calculate date range around target date start_date = target_date - timedelta(days=days_range//2) end_date = target_date + timedelta(days=days_range//2) params = { "latitude": latitude, "longitude": longitude, "current": [ "temperature_2m", "relative_humidity_2m", "wind_speed_10m" # "wind_direction_10m", # "weather_code", # "pressure_msl", # "cloud_cover" ], "hourly": [ "temperature_2m", "relative_humidity_2m" # "wind_speed_10m", # "wind_direction_10m", # "precipitation", # "weather_code", # "visibility", # "cloud_cover", # "pressure_msl" ], # "daily": [ # "temperature_2m_max", # "temperature_2m_min"] # # "sunrise", # "sunset", # "precipitation_sum", # "wind_speed_10m_max", # "wind_direction_10m_dominant" # ], # "start_date": start_date.strftime("%Y-%m-%d"), # "end_date": end_date.strftime("%Y-%m-%d"), # "timezone": "auto" } # Convert lists to comma-separated strings for API for key, value in params.items(): if isinstance(value, list): params[key] = ",".join(value) async with aiohttp.ClientSession() as session: async with session.get(self.base_url, params=params) as response: if response.status == 200: return await response.json() else: raise Exception(f"Weather API request failed: {response.status}") def structure_weather_data(self, raw_weather: Dict, target_date: datetime) -> Dict: """Structure raw weather data into organized format.""" try: structured_data = { "location": { "latitude": raw_weather.get("latitude"), "longitude": raw_weather.get("longitude"), "elevation": raw_weather.get("elevation"), "timezone": raw_weather.get("timezone") }, "current_conditions": raw_weather.get("current", {}), "hourly_forecast": { "times": raw_weather.get("hourly", {}).get("time", []), "temperature": raw_weather.get("hourly", {}).get("temperature_2m", []), "humidity": raw_weather.get("hourly", {}).get("relative_humidity_2m", []), "wind_speed": raw_weather.get("hourly", {}).get("wind_speed_10m", []), "wind_direction": raw_weather.get("hourly", {}).get("wind_direction_10m", []), "precipitation": raw_weather.get("hourly", {}).get("precipitation", []), "weather_code": raw_weather.get("hourly", {}).get("weather_code", []), "visibility": raw_weather.get("hourly", {}).get("visibility", []), "cloud_cover": raw_weather.get("hourly", {}).get("cloud_cover", []), "pressure": raw_weather.get("hourly", {}).get("pressure_msl", []) }, "daily_forecast": { "dates": raw_weather.get("daily", {}).get("time", []), "temperature_max": raw_weather.get("daily", {}).get("temperature_2m_max", []), "temperature_min": raw_weather.get("daily", {}).get("temperature_2m_min", []), "sunrise": raw_weather.get("daily", {}).get("sunrise", []), "sunset": raw_weather.get("daily", {}).get("sunset", []), "precipitation_sum": raw_weather.get("daily", {}).get("precipitation_sum", []), "wind_speed_max": raw_weather.get("daily", {}).get("wind_speed_10m_max", []), "wind_direction_dominant": raw_weather.get("daily", {}).get("wind_direction_10m_dominant", []) }, "target_date": target_date.strftime("%Y-%m-%d"), "data_retrieved_at": datetime.now().isoformat() } return structured_data except Exception as e: return {"error": f"Error structuring weather data: {e}"} async def extract_booking_weather_data(self, booking_data: Dict) -> Dict: """Main extraction function - returns structured data for external use.""" try: # Extract all booking information booking_info = self.extract_booking_info(booking_data) # Extract coordinates and target date latitude, longitude = self.extract_coordinates(booking_data) print(f"Latitude: {latitude}, Longitude: {longitude}") target_date = self.parse_target_date(booking_data) # Fetch raw weather data raw_weather = await self.fetch_weather_data(latitude, longitude, target_date) # Structure the weather data structured_weather = self.structure_weather_data(raw_weather, target_date) # Return combined structured data return { # "booking_data": booking_info, # "coordinates": { # "latitude": latitude, # "longitude": longitude # }, # "target_date": target_date.strftime("%Y-%m-%d"), "weather_data": structured_weather, "extraction_metadata": { "processed_at": datetime.now().isoformat(), "api_endpoint": self.base_url, "data_source": "open-meteo" } } except Exception as e: return { "error": f"Data extraction failed: {e}", "job_id": booking_data.get("job_id", "unknown"), "processed_at": datetime.now().isoformat() } # Example usage async def main(): """Demonstrate the data extraction system.""" # Sample booking data booking_form_input = { "job_id": "1043", "job_overview": { "job_number": "Job #1043", "site_name": "Hightower Solar Farm", "assigned_date": "January 23, 2025", "status": "Scheduled" }, "site_information": { "site_name": "Hightower Solar Farm", "region": "North England", "full_address": "Grange Lane, Manchester M34 7TF", "gps_coordinates": { "latitude": "13.41° N", "longitude": "52.52 W" } }, "timing": { "start_time": "09:00 AM", "end_time": "10:30 AM", "survey_duration": "30–45 mins", "buffer_time": "45 mins" }, "form": { "asset_type": "Solar Farm", "system_size": "5.2 MW capacity, approximately 16,000 panels across 12 hectares", "survey_purpose": "Insurance assessment", "assigned_engineer": "David Wilson - 0161-555-0876", "contact_person": "Sarah Thompson", "contact_phone": "0161-555-0234" } } # Initialize extractor extractor = DroneWeatherDataExtractor() # Extract structured data result = await extractor.extract_booking_weather_data(booking_form_input) # Display structured output print("=== STRUCTURED DATA EXTRACTION ===") print(json.dumps(result, indent=2)) if __name__ == "__main__": asyncio.run(main())