From 8613d218cdba9a76ca23ea63814ae839d2025f74 Mon Sep 17 00:00:00 2001 From: Tykayn Date: Thu, 18 Sep 2025 19:27:28 +0200 Subject: [PATCH] up demo links and map controls --- README.md | 18 +- doc/database_schema.svg | 107 ++++ extractors/osm_cal.py | 467 +++++++++++------- requirements.txt | 15 +- server_config/SYSTEMD_SERVICE_INSTALLATION.md | 2 +- 5 files changed, 420 insertions(+), 189 deletions(-) create mode 100644 doc/database_schema.svg mode change 100644 => 100755 extractors/osm_cal.py diff --git a/README.md b/README.md index ba353f2..ad328b0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # OpenEventDatabase Backend +![oedb.png](oedb.png) OpenEventDatabase (OEDB) is a database for events with geographic information. It is a collaborative way to share things that have no space in OpenStreetMap. @@ -156,10 +157,21 @@ créer une page de démo qui permet de modifier un évènement, faire un lien ve vérifier le fonctionnement des endpoints de recherche avec les queryparameters, les mettre dans la page de démo. la page /demo/by-what a une erreur, Error: Expecting value: line 1 column 1 (char 0) -récupérer les évènements depuis osmcal dans esm_cal.py -dans les extracteurs, vérifier qu'il n'existe pas déjà des évènements avec les mês propriétés avant de les créer. +récupérer les évènements depuis osmcal dans osm_cal.py ✓ +dans les extracteurs, vérifier qu'il n'existe pas déjà des évènements avec les mês propriétés avant de les créer. ✓ -Error: Expecting value: line 1 column 1 (char 0) +## Database Schema + +The following diagram shows the database schema for the OpenEventDatabase: + +![Database Schema](doc/database_schema.svg) + +The database consists of three main tables: +- **events**: Stores event data including type, what, when, geo reference, and tags +- **events_deleted**: Archive of deleted events +- **geo**: Stores geometry data referenced by events + +The events table has a foreign key relationship with the geo table through the events_geo field, which references the hash field in the geo table. -- il manque l'attribution openstreetmap sur les cartes maplibre. ✓ diff --git a/doc/database_schema.svg b/doc/database_schema.svg new file mode 100644 index 0000000..1466a59 --- /dev/null +++ b/doc/database_schema.svg @@ -0,0 +1,107 @@ + + + + + + + + + + + OpenEventDatabase Schema + + + + + events + + events_id (uuid) + createdate (timestamp) + lastupdate (timestamp) + events_type (text) + events_what (text) + events_when (tstzrange) + events_geo (text) → geo.hash + events_tags (jsonb) + + + + + events_deleted + + events_id (uuid) + createdate (timestamp) + lastupdate (timestamp) + events_type (text) + events_what (text) + events_when (tstzrange) + events_geo (text) + events_tags (jsonb) + + + + + geo + + geom (geometry) + hash (text) + geom_center (point) + idx (geometry) + + + + + + + + Indexes + + events_idx_antidup (unique) + events_idx_id (unique) + events_idx_lastupdate + events_idx_what (spgist) + events_idx_when (spgist) + geo_geom (gist) + geo_idx (gist) + events_idx_where_osm (spgist) + \ No newline at end of file diff --git a/extractors/osm_cal.py b/extractors/osm_cal.py old mode 100644 new mode 100755 index bbbbdd2..cfffb52 --- a/extractors/osm_cal.py +++ b/extractors/osm_cal.py @@ -2,8 +2,8 @@ """ OSM Calendar Extractor for the OpenEventDatabase. -This script fetches OpenStreetMap events from the osmcal.org RSS feed -and adds them to the OpenEventDatabase. +This script fetches events from the OpenStreetMap Calendar RSS feed +and adds them to the OpenEventDatabase if they don't already exist. RSS Feed URL: https://osmcal.org/events.rss """ @@ -12,12 +12,10 @@ import json import requests import sys import os -import feedparser -from datetime import datetime, timedelta -import pytz -from dateutil import parser as date_parser +import xml.etree.ElementTree as ET import re -from urllib.parse import urlparse +import html +from datetime import datetime, timedelta # Add the parent directory to the path so we can import from oedb sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) @@ -25,143 +23,219 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..') from oedb.utils.db import db_connect from oedb.utils.logging import logger -# RSS Feed URL for osmcal.org +# RSS Feed URL for OSM Calendar RSS_URL = "https://osmcal.org/events.rss" -def fetch_osmcal_data(): +def fetch_osm_calendar_data(): """ - Fetch OpenStreetMap events from the osmcal.org RSS feed. - + Fetch events from the OSM Calendar RSS feed. + Returns: - list: A list of event entries from the RSS feed. + list: A list of event items from the RSS feed. """ - logger.info("Fetching data from osmcal.org RSS feed") - + logger.info("Fetching data from OSM Calendar RSS feed") + try: - # Parse the RSS feed - feed = feedparser.parse(RSS_URL) - - if not feed.entries: - logger.error("No entries found in RSS feed") + response = requests.get(RSS_URL) + response.raise_for_status() # Raise an exception for HTTP errors + + # Parse the XML response + root = ET.fromstring(response.content) + + # Find all item elements (events) + channel = root.find('channel') + if channel is None: + logger.error("No channel element found in RSS feed") return [] - - logger.success(f"Successfully fetched {len(feed.entries)} events from osmcal.org") - return feed.entries - + + items = channel.findall('item') + + if not items: + logger.error("No items found in RSS feed") + return [] + + logger.success(f"Successfully fetched {len(items)} events from OSM Calendar RSS feed") + return items + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching data from OSM Calendar RSS feed: {e}") + return [] + except ET.ParseError as e: + logger.error(f"Error parsing XML response: {e}") + return [] except Exception as e: - logger.error(f"Error fetching data from osmcal.org: {e}") + logger.error(f"Unexpected error fetching OSM Calendar data: {e}") return [] -def extract_coordinates(location_str): +def parse_event_dates(description): """ - Extract coordinates from a location string. - - Args: - location_str (str): A string containing location information. - - Returns: - tuple: A tuple containing (longitude, latitude) or None if not found. - """ - # Try to find coordinates in the format "lat,lon" or similar - coord_pattern = r'(-?\d+\.\d+)[,\s]+(-?\d+\.\d+)' - match = re.search(coord_pattern, location_str) - - if match: - lat = float(match.group(1)) - lon = float(match.group(2)) - return [lon, lat] # GeoJSON uses [longitude, latitude] - - # Default coordinates (center of France) if none found - return [2.2137, 46.2276] + Parse event dates from the description. -def parse_date(date_str): - """ - Parse a date string into an ISO format string. - Args: - date_str (str): A string containing date information. - + description (str): The event description HTML. + Returns: - str: An ISO format date string. + tuple: A tuple containing (start_date, end_date) as ISO format strings. """ try: - # Parse the date string - dt = date_parser.parse(date_str) - - # Ensure the datetime is timezone-aware - if dt.tzinfo is None: - dt = pytz.UTC.localize(dt) - - return dt.isoformat() - - except Exception as e: - logger.error(f"Error parsing date '{date_str}': {e}") - # Return current date as fallback - return datetime.now(pytz.UTC).isoformat() + # Extract the date information from the description + date_pattern = r'(\d+)(?:st|nd|rd|th)\s+(\w+)(?:\s+(\d+):(\d+)(?:\s+–\s+(\d+):(\d+))?)?(?:\s+\(([^)]+)\))?(?:\s+–\s+(\d+)(?:st|nd|rd|th)\s+(\w+))?' + date_match = re.search(date_pattern, description) -def create_event(entry): + if not date_match: + # Try alternative pattern for single day with time range + date_pattern = r'(\d+)(?:st|nd|rd|th)\s+(\w+)\s+(\d+):(\d+)\s+–\s+(\d+):(\d+)' + date_match = re.search(date_pattern, description) + + if date_match: + # Extract date components + day = int(date_match.group(1)) + month_name = date_match.group(2) + + # Convert month name to month number + month_map = { + 'January': 1, 'February': 2, 'March': 3, 'April': 4, + 'May': 5, 'June': 6, 'July': 7, 'August': 8, + 'September': 9, 'October': 10, 'November': 11, 'December': 12 + } + + # Try to match the month name (case insensitive) + month = None + for name, num in month_map.items(): + if month_name.lower() == name.lower(): + month = num + break + + if month is None: + # If month name not found, use current month + month = datetime.now().month + logger.warning(f"Could not parse month name: {month_name}, using current month") + + # Get current year (assuming events are current or future) + current_year = datetime.now().year + + # Create start date + try: + start_date = datetime(current_year, month, day) + except ValueError: + # Handle invalid dates (e.g., February 30) + logger.warning(f"Invalid date: {day} {month_name} {current_year}, using current date") + start_date = datetime.now() + + # Check if there's an end date + if len(date_match.groups()) >= 8 and date_match.group(8): + end_day = int(date_match.group(8)) + end_month_name = date_match.group(9) + + # Convert end month name to month number + end_month = None + for name, num in month_map.items(): + if end_month_name.lower() == name.lower(): + end_month = num + break + + if end_month is None: + # If end month name not found, use start month + end_month = month + logger.warning(f"Could not parse end month name: {end_month_name}, using start month") + + try: + end_date = datetime(current_year, end_month, end_day) + # Add a day to include the full end day + end_date = end_date + timedelta(days=1) + except ValueError: + # Handle invalid dates + logger.warning(f"Invalid end date: {end_day} {end_month_name} {current_year}, using start date + 1 day") + end_date = start_date + timedelta(days=1) + else: + # If no end date, use start date + 1 day as default + end_date = start_date + timedelta(days=1) + + # Format dates as ISO strings + start_iso = start_date.isoformat() + end_iso = end_date.isoformat() + + return (start_iso, end_iso) + else: + # If no date pattern found, use current date as fallback + now = datetime.now() + start_iso = now.isoformat() + end_iso = (now + timedelta(days=1)).isoformat() + logger.warning(f"Could not parse date from description, using current date: {start_iso} to {end_iso}") + return (start_iso, end_iso) + + except Exception as e: + logger.error(f"Error parsing event dates: {e}") + # Return default dates (current date) + now = datetime.now() + return (now.isoformat(), (now + timedelta(days=1)).isoformat()) + +def extract_location(description): """ - Create an event object from an RSS feed entry. - + Extract location information from the event description. + Args: - entry: An entry from the osmcal.org RSS feed. - + description (str): The event description HTML. + + Returns: + tuple: A tuple containing (location_name, coordinates). + """ + try: + # Default coordinates (center of the world) + coordinates = [0, 0] + location_name = "Unknown Location" + + # Try to find location in the description + location_pattern = r'

([^<]+)

' + location_matches = re.findall(location_pattern, description) + + if location_matches and len(location_matches) > 1: + # The second paragraph often contains the location + location_candidate = location_matches[1].strip() + if location_candidate and "," in location_candidate and not location_candidate.startswith('<'): + location_name = location_candidate + + # For now, we don't have exact coordinates, so we'll use a placeholder + # In a real implementation, you might want to geocode the location + coordinates = [0, 0] + + return (location_name, coordinates) + + except Exception as e: + logger.error(f"Error extracting location: {e}") + return ("Unknown Location", [0, 0]) + +def create_event(item): + """ + Create an event object from an RSS item. + + Args: + item: An item element from the RSS feed. + Returns: dict: A GeoJSON Feature representing the event. """ try: - # Extract data from the entry - title = entry.title - link = entry.link - description = entry.description if hasattr(entry, 'description') else "" - - # Extract dates - start_date = None - end_date = None - - if hasattr(entry, 'published'): - start_date = parse_date(entry.published) - - # If there's no published date, use the current date - if not start_date: - start_date = datetime.now(pytz.UTC).isoformat() - - # Set end date to 1 day after start date if not specified - if not end_date: - dt = date_parser.parse(start_date) - end_date = (dt + timedelta(days=1)).isoformat() - - # Extract location and coordinates - location = "" - coordinates = [2.2137, 46.2276] # Default: center of France - - if hasattr(entry, 'where') and entry.where: - location = entry.where - coordinates = extract_coordinates(location) - elif description: - # Try to extract location from description - location_match = re.search(r'Location:?\s*([^\n]+)', description, re.IGNORECASE) - if location_match: - location = location_match.group(1).strip() - coordinates = extract_coordinates(location) - + # Extract data from the item + title = item.find('title').text + link = item.find('link').text + description = item.find('description').text + guid = item.find('guid').text + + # Clean up the description (remove HTML tags for text extraction) + clean_description = re.sub(r'<[^>]+>', ' ', description) + clean_description = html.unescape(clean_description) + clean_description = re.sub(r'\s+', ' ', clean_description).strip() + + # Parse dates from the description + start_date, end_date = parse_event_dates(description) + + # Extract location information + location_name, coordinates = extract_location(description) + # Create a descriptive label label = title - - # Determine the event type - what = "community.osm.meetup" - - # Check for specific event types in the title or description - lower_title = title.lower() - lower_desc = description.lower() - - if any(term in lower_title or term in lower_desc for term in ["conference", "summit"]): - what = "community.osm.conference" - elif any(term in lower_title or term in lower_desc for term in ["workshop", "training"]): - what = "community.osm.workshop" - elif any(term in lower_title or term in lower_desc for term in ["mapathon", "mapping party"]): - what = "community.osm.mapathon" - + # Create the event object event = { "type": "Feature", @@ -171,43 +245,100 @@ def create_event(entry): }, "properties": { "type": "scheduled", - "what": what, - "what:series": "OpenStreetMap Events", - "where": location, + "what": "community.osm.event", + "what:series": "OpenStreetMap Calendar", + "where": location_name, "label": label, - "description": description, + "description": clean_description, "start": start_date, "stop": end_date, "url": link, - "source": "osmcal.org" + "external_id": guid, + "source": "OSM Calendar" } } - + return event - + except Exception as e: - logger.error(f"Error creating event from entry: {e}") + logger.error(f"Error creating event from item: {e}") return None +def event_exists(db, properties): + """ + Check if an event with the same properties already exists in the database. + + Args: + db: Database connection. + properties: Event properties. + + Returns: + bool: True if the event exists, False otherwise. + """ + try: + cur = db.cursor() + + # Check if an event with the same external_id exists + if 'external_id' in properties: + cur.execute(""" + SELECT events_id FROM events + WHERE events_tags->>'external_id' = %s; + """, (properties['external_id'],)) + + result = cur.fetchone() + if result: + logger.info(f"Event with external_id {properties['external_id']} already exists") + return True + + # Check if an event with the same label, start, and stop exists + cur.execute(""" + SELECT events_id FROM events + WHERE events_tags->>'label' = %s + AND events_tags->>'start' = %s + AND events_tags->>'stop' = %s; + """, ( + properties.get('label', ''), + properties.get('start', ''), + properties.get('stop', '') + )) + + result = cur.fetchone() + if result: + logger.info(f"Event with label '{properties.get('label')}' and same dates already exists") + return True + + return False + + except Exception as e: + logger.error(f"Error checking if event exists: {e}") + return False + def submit_event(event): """ Submit an event to the OpenEventDatabase. - + Args: event: A GeoJSON Feature representing the event. - + Returns: bool: True if the event was successfully submitted, False otherwise. """ try: # Connect to the database db = db_connect() - cur = db.cursor() - + # Extract event properties properties = event['properties'] + + # Check if the event already exists + if event_exists(db, properties): + logger.info(f"Skipping event '{properties.get('label')}' as it already exists") + db.close() + return False + + cur = db.cursor() geometry = json.dumps(event['geometry']) - + # Insert the geometry into the geo table cur.execute(""" INSERT INTO geo @@ -216,10 +347,10 @@ def submit_event(event): WHERE ST_IsValid(geom) ON CONFLICT DO NOTHING RETURNING hash; """, (geometry,)) - + # Get the geometry hash hash_result = cur.fetchone() - + if hash_result is None: # If the hash is None, get it from the database cur.execute(""" @@ -228,38 +359,17 @@ def submit_event(event): ST_IsValidReason(geom) from (SELECT st_geomfromgeojson(%s) as geom) as g; """, (geometry,)) hash_result = cur.fetchone() - + if hash_result is None or (len(hash_result) > 1 and not hash_result[1]): logger.error(f"Invalid geometry for event: {properties.get('label')}") db.close() return False - + geo_hash = hash_result[0] - + # Determine the bounds for the time range bounds = '[]' if properties['start'] == properties['stop'] else '[)' - - # Check if an event with the same properties already exists - cur.execute(""" - SELECT events_id FROM events - WHERE events_what = %s - AND events_when = tstzrange(%s, %s, %s) - AND events_geo = %s; - """, ( - properties['what'], - properties['start'], - properties['stop'], - bounds, - geo_hash - )) - - existing_id = cur.fetchone() - - if existing_id: - logger.info(f"Event already exists with ID: {existing_id[0]}") - db.close() - return False - + # Insert the event into the database cur.execute(""" INSERT INTO events (events_type, events_what, events_when, events_tags, events_geo) @@ -274,10 +384,10 @@ def submit_event(event): json.dumps(properties), geo_hash )) - + # Get the event ID event_id = cur.fetchone() - + if event_id: logger.success(f"Event created with ID: {event_id[0]}") db.commit() @@ -287,39 +397,38 @@ def submit_event(event): logger.warning(f"Failed to create event: {properties.get('label')}") db.close() return False - + except Exception as e: logger.error(f"Error submitting event: {e}") return False def main(): """ - Main function to fetch OSM Calendar data and add events to the database. + Main function to fetch OSM Calendar events and add them to the database. """ logger.info("Starting OSM Calendar extractor") - - # Fetch data from osmcal.org - entries = fetch_osmcal_data() - - if not entries: - logger.warning("No entries found, exiting") + + # Fetch events from the OSM Calendar RSS feed + items = fetch_osm_calendar_data() + + if not items: + logger.warning("No events found, exiting") return - - # Process each entry + + # Process each item success_count = 0 - - for entry in entries: - # Create an event from the entry - event = create_event(entry) - + for item in items: + # Create an event from the item + event = create_event(item) + if not event: continue - + # Submit the event to the database if submit_event(event): success_count += 1 - - logger.success(f"Successfully added {success_count} events to the database") + + logger.success(f"Successfully added {success_count} out of {len(items)} events to the database") if __name__ == "__main__": main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 237c207..5e1d4bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,9 @@ -falcon -psycopg2-binary -geojson -gunicorn -uwsgi -requests +beautifulsoup4==4.13.5 +config==0.5.1 +falcon==4.1.0 +iso8601==2.1.0 +psycopg2_binary==2.9.10 +pyproj==3.7.2 +pytz==2025.2 +Requests==2.32.5 +waitress==3.0.2 diff --git a/server_config/SYSTEMD_SERVICE_INSTALLATION.md b/server_config/SYSTEMD_SERVICE_INSTALLATION.md index f8067c3..6fcbac3 100644 --- a/server_config/SYSTEMD_SERVICE_INSTALLATION.md +++ b/server_config/SYSTEMD_SERVICE_INSTALLATION.md @@ -18,7 +18,7 @@ Ce document explique comment installer et activer le service systemd pour faire ```bash sudo cp oedb-uwsgi.service /etc/systemd/system/ sudo chmod 644 /etc/systemd/system/oedb-uwsgi.service -sudo chown -R www-data:www-data /home/poule/encrypted/stockage-syncable/www/development/html/oedb-backend +sudo chown -R www-data:www-data /home/poule/encrypted/oedb-backend sudo systemctl daemon-reload sudo systemctl enable oedb-uwsgi.service sudo systemctl start oedb-uwsgi.service