#!/usr/bin/env python3 """ OSM Calendar Extractor for the OpenEventDatabase. This script fetches OpenStreetMap events from the osmcal.org RSS feed and adds them to the OpenEventDatabase. RSS Feed URL: https://osmcal.org/events.rss """ import json import requests import sys import os import feedparser from datetime import datetime, timedelta import pytz from dateutil import parser as date_parser import re from urllib.parse import urlparse # Add the parent directory to the path so we can import from oedb sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from oedb.utils.db import db_connect from oedb.utils.logging import logger # RSS Feed URL for osmcal.org RSS_URL = "https://osmcal.org/events.rss" def fetch_osmcal_data(): """ Fetch OpenStreetMap events from the osmcal.org RSS feed. Returns: list: A list of event entries from the RSS feed. """ logger.info("Fetching data from osmcal.org RSS feed") try: # Parse the RSS feed feed = feedparser.parse(RSS_URL) if not feed.entries: logger.error("No entries found in RSS feed") return [] logger.success(f"Successfully fetched {len(feed.entries)} events from osmcal.org") return feed.entries except Exception as e: logger.error(f"Error fetching data from osmcal.org: {e}") return [] def extract_coordinates(location_str): """ Extract coordinates from a location string. Args: location_str (str): A string containing location information. Returns: tuple: A tuple containing (longitude, latitude) or None if not found. """ # Try to find coordinates in the format "lat,lon" or similar coord_pattern = r'(-?\d+\.\d+)[,\s]+(-?\d+\.\d+)' match = re.search(coord_pattern, location_str) if match: lat = float(match.group(1)) lon = float(match.group(2)) return [lon, lat] # GeoJSON uses [longitude, latitude] # Default coordinates (center of France) if none found return [2.2137, 46.2276] def parse_date(date_str): """ Parse a date string into an ISO format string. Args: date_str (str): A string containing date information. Returns: str: An ISO format date string. """ try: # Parse the date string dt = date_parser.parse(date_str) # Ensure the datetime is timezone-aware if dt.tzinfo is None: dt = pytz.UTC.localize(dt) return dt.isoformat() except Exception as e: logger.error(f"Error parsing date '{date_str}': {e}") # Return current date as fallback return datetime.now(pytz.UTC).isoformat() def create_event(entry): """ Create an event object from an RSS feed entry. Args: entry: An entry from the osmcal.org RSS feed. Returns: dict: A GeoJSON Feature representing the event. """ try: # Extract data from the entry title = entry.title link = entry.link description = entry.description if hasattr(entry, 'description') else "" # Extract dates start_date = None end_date = None if hasattr(entry, 'published'): start_date = parse_date(entry.published) # If there's no published date, use the current date if not start_date: start_date = datetime.now(pytz.UTC).isoformat() # Set end date to 1 day after start date if not specified if not end_date: dt = date_parser.parse(start_date) end_date = (dt + timedelta(days=1)).isoformat() # Extract location and coordinates location = "" coordinates = [2.2137, 46.2276] # Default: center of France if hasattr(entry, 'where') and entry.where: location = entry.where coordinates = extract_coordinates(location) elif description: # Try to extract location from description location_match = re.search(r'Location:?\s*([^\n]+)', description, re.IGNORECASE) if location_match: location = location_match.group(1).strip() coordinates = extract_coordinates(location) # Create a descriptive label label = title # Determine the event type what = "community.osm.meetup" # Check for specific event types in the title or description lower_title = title.lower() lower_desc = description.lower() if any(term in lower_title or term in lower_desc for term in ["conference", "summit"]): what = "community.osm.conference" elif any(term in lower_title or term in lower_desc for term in ["workshop", "training"]): what = "community.osm.workshop" elif any(term in lower_title or term in lower_desc for term in ["mapathon", "mapping party"]): what = "community.osm.mapathon" # Create the event object event = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coordinates }, "properties": { "type": "scheduled", "what": what, "what:series": "OpenStreetMap Events", "where": location, "label": label, "description": description, "start": start_date, "stop": end_date, "url": link, "source": "osmcal.org" } } return event except Exception as e: logger.error(f"Error creating event from entry: {e}") return None def submit_event(event): """ Submit an event to the OpenEventDatabase. Args: event: A GeoJSON Feature representing the event. Returns: bool: True if the event was successfully submitted, False otherwise. """ try: # Connect to the database db = db_connect() cur = db.cursor() # Extract event properties properties = event['properties'] geometry = json.dumps(event['geometry']) # Insert the geometry into the geo table cur.execute(""" INSERT INTO geo SELECT geom, md5(st_astext(geom)) as hash, st_centroid(geom) as geom_center FROM (SELECT st_setsrid(st_geomfromgeojson(%s),4326) as geom) as g WHERE ST_IsValid(geom) ON CONFLICT DO NOTHING RETURNING hash; """, (geometry,)) # Get the geometry hash hash_result = cur.fetchone() if hash_result is None: # If the hash is None, get it from the database cur.execute(""" SELECT md5(st_asewkt(geom)), ST_IsValid(geom), ST_IsValidReason(geom) from (SELECT st_geomfromgeojson(%s) as geom) as g; """, (geometry,)) hash_result = cur.fetchone() if hash_result is None or (len(hash_result) > 1 and not hash_result[1]): logger.error(f"Invalid geometry for event: {properties.get('label')}") db.close() return False geo_hash = hash_result[0] # Determine the bounds for the time range bounds = '[]' if properties['start'] == properties['stop'] else '[)' # Check if an event with the same properties already exists cur.execute(""" SELECT events_id FROM events WHERE events_what = %s AND events_when = tstzrange(%s, %s, %s) AND events_geo = %s; """, ( properties['what'], properties['start'], properties['stop'], bounds, geo_hash )) existing_id = cur.fetchone() if existing_id: logger.info(f"Event already exists with ID: {existing_id[0]}") db.close() return False # Insert the event into the database cur.execute(""" INSERT INTO events (events_type, events_what, events_when, events_tags, events_geo) VALUES (%s, %s, tstzrange(%s, %s, %s), %s, %s) ON CONFLICT DO NOTHING RETURNING events_id; """, ( properties['type'], properties['what'], properties['start'], properties['stop'], bounds, json.dumps(properties), geo_hash )) # Get the event ID event_id = cur.fetchone() if event_id: logger.success(f"Event created with ID: {event_id[0]}") db.commit() db.close() return True else: logger.warning(f"Failed to create event: {properties.get('label')}") db.close() return False except Exception as e: logger.error(f"Error submitting event: {e}") return False def main(): """ Main function to fetch OSM Calendar data and add events to the database. """ logger.info("Starting OSM Calendar extractor") # Fetch data from osmcal.org entries = fetch_osmcal_data() if not entries: logger.warning("No entries found, exiting") return # Process each entry success_count = 0 for entry in entries: # Create an event from the entry event = create_event(entry) if not event: continue # Submit the event to the database if submit_event(event): success_count += 1 logger.success(f"Successfully added {success_count} events to the database") if __name__ == "__main__": main()