2025-09-18 19:27:28 +02:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
OSM Calendar Extractor for the OpenEventDatabase.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
This script fetches events from the OpenStreetMap Calendar RSS feed
|
2025-09-26 11:57:54 +02:00
|
|
|
|
and adds them to the OpenEventDatabase via the API.
|
|
|
|
|
|
|
|
|
|
For events that don't have geographic coordinates in the RSS feed but have a link
|
|
|
|
|
to an OSM Calendar event (https://osmcal.org/event/...), the script will fetch
|
|
|
|
|
the iCal version of the event and extract the coordinates and location from there.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
|
|
|
|
RSS Feed URL: https://osmcal.org/events.rss
|
2025-09-26 11:57:54 +02:00
|
|
|
|
API Endpoint: https://api.openeventdatabase.org/event
|
2025-09-18 22:18:25 +02:00
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
Usage:
|
|
|
|
|
python osm_cal.py [--max-events MAX_EVENTS] [--offset OFFSET]
|
|
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
|
--max-events MAX_EVENTS Maximum number of events to insert (default: 1)
|
|
|
|
|
--offset OFFSET Number of events to skip from the beginning of the RSS feed (default: 0)
|
2025-09-18 22:18:25 +02:00
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
Examples:
|
|
|
|
|
# Insert the first event from the RSS feed
|
|
|
|
|
python osm_cal.py
|
|
|
|
|
|
|
|
|
|
# Insert up to 5 events from the RSS feed
|
|
|
|
|
python osm_cal.py --max-events 5
|
|
|
|
|
|
|
|
|
|
# Skip the first 3 events and insert the next 2
|
|
|
|
|
python osm_cal.py --offset 3 --max-events 2
|
|
|
|
|
|
|
|
|
|
Environment Variables:
|
|
|
|
|
These environment variables can be set in the system environment or in a .env file
|
|
|
|
|
in the project root directory.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import requests
|
|
|
|
|
import sys
|
|
|
|
|
import os
|
2025-09-18 19:27:28 +02:00
|
|
|
|
import xml.etree.ElementTree as ET
|
2025-09-18 19:27:28 +02:00
|
|
|
|
import re
|
2025-09-18 19:27:28 +02:00
|
|
|
|
import html
|
|
|
|
|
from datetime import datetime, timedelta
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
|
|
|
|
# Add the parent directory to the path so we can import from oedb
|
|
|
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
|
|
|
|
2025-09-18 22:18:25 +02:00
|
|
|
|
from oedb.utils.db import db_connect, load_env_from_file
|
2025-09-18 19:27:28 +02:00
|
|
|
|
from oedb.utils.logging import logger
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# RSS Feed URL for OSM Calendar
|
2025-09-18 19:27:28 +02:00
|
|
|
|
RSS_URL = "https://osmcal.org/events.rss"
|
2025-09-26 11:57:54 +02:00
|
|
|
|
# Base URL for OSM Calendar events
|
|
|
|
|
OSMCAL_EVENT_BASE_URL = "https://osmcal.org/event/"
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
def fetch_osm_calendar_data():
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Fetch events from the OSM Calendar RSS feed.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Returns:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
list: A list of event items from the RSS feed.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-18 19:27:28 +02:00
|
|
|
|
logger.info("Fetching data from OSM Calendar RSS feed")
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
try:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
response = requests.get(RSS_URL)
|
|
|
|
|
response.raise_for_status() # Raise an exception for HTTP errors
|
|
|
|
|
|
|
|
|
|
# Parse the XML response
|
|
|
|
|
root = ET.fromstring(response.content)
|
|
|
|
|
|
|
|
|
|
# Find all item elements (events)
|
|
|
|
|
channel = root.find('channel')
|
|
|
|
|
if channel is None:
|
|
|
|
|
logger.error("No channel element found in RSS feed")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
items = channel.findall('item')
|
|
|
|
|
|
|
|
|
|
if not items:
|
|
|
|
|
logger.error("No items found in RSS feed")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
return []
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
|
|
|
|
logger.success(f"Successfully fetched {len(items)} events from OSM Calendar RSS feed")
|
|
|
|
|
return items
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
logger.error(f"Error fetching data from OSM Calendar RSS feed: {e}")
|
|
|
|
|
return []
|
|
|
|
|
except ET.ParseError as e:
|
|
|
|
|
logger.error(f"Error parsing XML response: {e}")
|
|
|
|
|
return []
|
2025-09-18 19:27:28 +02:00
|
|
|
|
except Exception as e:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
logger.error(f"Unexpected error fetching OSM Calendar data: {e}")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
return []
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
def parse_event_dates(description):
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Parse event dates from the description.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Args:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
description (str): The event description HTML.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Returns:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
tuple: A tuple containing (start_date, end_date) as ISO format strings.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-18 19:27:28 +02:00
|
|
|
|
try:
|
|
|
|
|
# Extract the date information from the description
|
|
|
|
|
date_pattern = r'(\d+)(?:st|nd|rd|th)\s+(\w+)(?:\s+(\d+):(\d+)(?:\s+–\s+(\d+):(\d+))?)?(?:\s+\(([^)]+)\))?(?:\s+–\s+(\d+)(?:st|nd|rd|th)\s+(\w+))?'
|
|
|
|
|
date_match = re.search(date_pattern, description)
|
|
|
|
|
|
|
|
|
|
if not date_match:
|
|
|
|
|
# Try alternative pattern for single day with time range
|
|
|
|
|
date_pattern = r'(\d+)(?:st|nd|rd|th)\s+(\w+)\s+(\d+):(\d+)\s+–\s+(\d+):(\d+)'
|
|
|
|
|
date_match = re.search(date_pattern, description)
|
|
|
|
|
|
|
|
|
|
if date_match:
|
|
|
|
|
# Extract date components
|
|
|
|
|
day = int(date_match.group(1))
|
|
|
|
|
month_name = date_match.group(2)
|
|
|
|
|
|
|
|
|
|
# Convert month name to month number
|
|
|
|
|
month_map = {
|
|
|
|
|
'January': 1, 'February': 2, 'March': 3, 'April': 4,
|
|
|
|
|
'May': 5, 'June': 6, 'July': 7, 'August': 8,
|
|
|
|
|
'September': 9, 'October': 10, 'November': 11, 'December': 12
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Try to match the month name (case insensitive)
|
|
|
|
|
month = None
|
|
|
|
|
for name, num in month_map.items():
|
|
|
|
|
if month_name.lower() == name.lower():
|
|
|
|
|
month = num
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if month is None:
|
|
|
|
|
# If month name not found, use current month
|
|
|
|
|
month = datetime.now().month
|
|
|
|
|
logger.warning(f"Could not parse month name: {month_name}, using current month")
|
|
|
|
|
|
|
|
|
|
# Get current year (assuming events are current or future)
|
|
|
|
|
current_year = datetime.now().year
|
|
|
|
|
|
|
|
|
|
# Create start date
|
|
|
|
|
try:
|
|
|
|
|
start_date = datetime(current_year, month, day)
|
|
|
|
|
except ValueError:
|
|
|
|
|
# Handle invalid dates (e.g., February 30)
|
|
|
|
|
logger.warning(f"Invalid date: {day} {month_name} {current_year}, using current date")
|
|
|
|
|
start_date = datetime.now()
|
|
|
|
|
|
|
|
|
|
# Check if there's an end date
|
|
|
|
|
if len(date_match.groups()) >= 8 and date_match.group(8):
|
|
|
|
|
end_day = int(date_match.group(8))
|
|
|
|
|
end_month_name = date_match.group(9)
|
|
|
|
|
|
|
|
|
|
# Convert end month name to month number
|
|
|
|
|
end_month = None
|
|
|
|
|
for name, num in month_map.items():
|
|
|
|
|
if end_month_name.lower() == name.lower():
|
|
|
|
|
end_month = num
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if end_month is None:
|
|
|
|
|
# If end month name not found, use start month
|
|
|
|
|
end_month = month
|
|
|
|
|
logger.warning(f"Could not parse end month name: {end_month_name}, using start month")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
end_date = datetime(current_year, end_month, end_day)
|
|
|
|
|
# Add a day to include the full end day
|
|
|
|
|
end_date = end_date + timedelta(days=1)
|
|
|
|
|
except ValueError:
|
|
|
|
|
# Handle invalid dates
|
|
|
|
|
logger.warning(f"Invalid end date: {end_day} {end_month_name} {current_year}, using start date + 1 day")
|
|
|
|
|
end_date = start_date + timedelta(days=1)
|
|
|
|
|
else:
|
|
|
|
|
# If no end date, use start date + 1 day as default
|
|
|
|
|
end_date = start_date + timedelta(days=1)
|
|
|
|
|
|
|
|
|
|
# Format dates as ISO strings
|
|
|
|
|
start_iso = start_date.isoformat()
|
|
|
|
|
end_iso = end_date.isoformat()
|
|
|
|
|
|
|
|
|
|
return (start_iso, end_iso)
|
|
|
|
|
else:
|
|
|
|
|
# If no date pattern found, use current date as fallback
|
|
|
|
|
now = datetime.now()
|
|
|
|
|
start_iso = now.isoformat()
|
|
|
|
|
end_iso = (now + timedelta(days=1)).isoformat()
|
|
|
|
|
logger.warning(f"Could not parse date from description, using current date: {start_iso} to {end_iso}")
|
|
|
|
|
return (start_iso, end_iso)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error parsing event dates: {e}")
|
|
|
|
|
# Return default dates (current date)
|
|
|
|
|
now = datetime.now()
|
|
|
|
|
return (now.isoformat(), (now + timedelta(days=1)).isoformat())
|
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
def fetch_ical_data(event_url):
|
|
|
|
|
"""
|
|
|
|
|
Fetch and parse iCal data for an OSM Calendar event.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
event_url (str): The URL of the OSM Calendar event.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
tuple: A tuple containing (location_name, coordinates).
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# Check if the URL is an OSM Calendar event URL
|
|
|
|
|
if not event_url.startswith(OSMCAL_EVENT_BASE_URL):
|
|
|
|
|
logger.warning(f"Not an OSM Calendar event URL: {event_url}")
|
|
|
|
|
return ("Unknown Location", [0, 0])
|
|
|
|
|
|
|
|
|
|
# Extract the event ID from the URL
|
|
|
|
|
event_id_match = re.search(r'event/(\d+)', event_url)
|
|
|
|
|
if not event_id_match:
|
|
|
|
|
logger.warning(f"Could not extract event ID from URL: {event_url}")
|
|
|
|
|
return ("Unknown Location", [0, 0])
|
|
|
|
|
|
|
|
|
|
event_id = event_id_match.group(1)
|
|
|
|
|
|
|
|
|
|
# Construct the iCal URL
|
|
|
|
|
ical_url = f"{OSMCAL_EVENT_BASE_URL}{event_id}.ics"
|
|
|
|
|
|
|
|
|
|
# Fetch the iCal content
|
|
|
|
|
logger.info(f"Fetching iCal data from: {ical_url}")
|
|
|
|
|
response = requests.get(ical_url)
|
|
|
|
|
|
|
|
|
|
if not response.ok:
|
|
|
|
|
logger.warning(f"Failed to fetch iCal data: {response.status_code}")
|
|
|
|
|
return ("Unknown Location", [0, 0])
|
|
|
|
|
|
|
|
|
|
# Parse the iCal content
|
|
|
|
|
ical_content = response.text
|
|
|
|
|
|
|
|
|
|
# Extract GEO information
|
|
|
|
|
geo_match = re.search(r'GEO:([-+]?\d+\.\d+);([-+]?\d+\.\d+)', ical_content)
|
|
|
|
|
if geo_match:
|
|
|
|
|
# GEO format is latitude;longitude
|
|
|
|
|
latitude = float(geo_match.group(2))
|
|
|
|
|
longitude = float(geo_match.group(1))
|
|
|
|
|
coordinates = [longitude, latitude] # GeoJSON uses [longitude, latitude]
|
|
|
|
|
logger.info(f"Extracted coordinates from iCal: {coordinates}")
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(f"No GEO information found in iCal data for event: {event_id}")
|
|
|
|
|
coordinates = [0, 0]
|
|
|
|
|
|
|
|
|
|
# Extract LOCATION information
|
|
|
|
|
location_match = re.search(r'LOCATION:(.+?)(?:\r\n|\n|\r)', ical_content)
|
|
|
|
|
if location_match:
|
|
|
|
|
location_name = location_match.group(1).strip()
|
|
|
|
|
# Unescape backslash-escaped characters (e.g., \, becomes ,)
|
|
|
|
|
location_name = re.sub(r'\\(.)', r'\1', location_name)
|
|
|
|
|
logger.info(f"Extracted location from iCal: {location_name}")
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(f"No LOCATION information found in iCal data for event: {event_id}")
|
|
|
|
|
location_name = "Unknown Location"
|
|
|
|
|
|
|
|
|
|
return (location_name, coordinates)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error fetching or parsing iCal data: {e}")
|
|
|
|
|
return ("Unknown Location", [0, 0])
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
def extract_location(description):
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Extract location information from the event description.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Args:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
description (str): The event description HTML.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Returns:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
tuple: A tuple containing (location_name, coordinates).
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
|
|
|
|
try:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# Default coordinates (center of the world)
|
|
|
|
|
coordinates = [0, 0]
|
|
|
|
|
location_name = "Unknown Location"
|
|
|
|
|
|
|
|
|
|
# Try to find location in the description
|
|
|
|
|
location_pattern = r'<p>([^<]+)</p>'
|
|
|
|
|
location_matches = re.findall(location_pattern, description)
|
|
|
|
|
|
|
|
|
|
if location_matches and len(location_matches) > 1:
|
|
|
|
|
# The second paragraph often contains the location
|
|
|
|
|
location_candidate = location_matches[1].strip()
|
|
|
|
|
if location_candidate and "," in location_candidate and not location_candidate.startswith('<'):
|
|
|
|
|
location_name = location_candidate
|
|
|
|
|
|
|
|
|
|
# For now, we don't have exact coordinates, so we'll use a placeholder
|
|
|
|
|
# In a real implementation, you might want to geocode the location
|
|
|
|
|
coordinates = [0, 0]
|
|
|
|
|
|
|
|
|
|
return (location_name, coordinates)
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
except Exception as e:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
logger.error(f"Error extracting location: {e}")
|
|
|
|
|
return ("Unknown Location", [0, 0])
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
def create_event(item):
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Create an event object from an RSS item.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Args:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
item: An item element from the RSS feed.
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Returns:
|
|
|
|
|
dict: A GeoJSON Feature representing the event.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# Extract data from the item
|
|
|
|
|
title = item.find('title').text
|
|
|
|
|
link = item.find('link').text
|
|
|
|
|
description = item.find('description').text
|
|
|
|
|
guid = item.find('guid').text
|
|
|
|
|
|
|
|
|
|
# Clean up the description (remove HTML tags for text extraction)
|
|
|
|
|
clean_description = re.sub(r'<[^>]+>', ' ', description)
|
|
|
|
|
clean_description = html.unescape(clean_description)
|
|
|
|
|
clean_description = re.sub(r'\s+', ' ', clean_description).strip()
|
|
|
|
|
|
|
|
|
|
# Parse dates from the description
|
|
|
|
|
start_date, end_date = parse_event_dates(description)
|
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
# Extract location information from the description
|
2025-09-18 19:27:28 +02:00
|
|
|
|
location_name, coordinates = extract_location(description)
|
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
# If we don't have coordinates and the link is to an OSM Calendar event,
|
|
|
|
|
# try to get coordinates and location from the iCal file
|
|
|
|
|
if coordinates == [0, 0] and link and link.startswith(OSMCAL_EVENT_BASE_URL):
|
|
|
|
|
logger.info(f"No coordinates found in description, trying to get from iCal: {link}")
|
|
|
|
|
ical_location_name, ical_coordinates = fetch_ical_data(link)
|
|
|
|
|
|
|
|
|
|
# Use iCal coordinates if available
|
|
|
|
|
if ical_coordinates != [0, 0]:
|
|
|
|
|
coordinates = ical_coordinates
|
|
|
|
|
logger.info(f"Using coordinates from iCal: {coordinates}")
|
|
|
|
|
|
|
|
|
|
# Use iCal location name if available and better than what we have
|
|
|
|
|
if ical_location_name != "Unknown Location":
|
|
|
|
|
location_name = ical_location_name
|
|
|
|
|
logger.info(f"Using location name from iCal: {location_name}")
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# Create a descriptive label
|
|
|
|
|
label = title
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# Create the event object
|
|
|
|
|
event = {
|
|
|
|
|
"type": "Feature",
|
|
|
|
|
"geometry": {
|
|
|
|
|
"type": "Point",
|
|
|
|
|
"coordinates": coordinates
|
|
|
|
|
},
|
|
|
|
|
"properties": {
|
|
|
|
|
"type": "scheduled",
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"what": "community.osm.event",
|
|
|
|
|
"what:series": "OpenStreetMap Calendar",
|
|
|
|
|
"where": location_name,
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"label": label,
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"description": clean_description,
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"start": start_date,
|
|
|
|
|
"stop": end_date,
|
|
|
|
|
"url": link,
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"external_id": guid,
|
|
|
|
|
"source": "OSM Calendar"
|
2025-09-18 19:27:28 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
return event
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
except Exception as e:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
logger.error(f"Error creating event from item: {e}")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
return None
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
def event_exists(db, properties):
|
|
|
|
|
"""
|
|
|
|
|
Check if an event with the same properties already exists in the database.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
db: Database connection.
|
|
|
|
|
properties: Event properties.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bool: True if the event exists, False otherwise.
|
|
|
|
|
"""
|
2025-09-18 23:43:06 +02:00
|
|
|
|
print('event: ', properties)
|
2025-09-18 19:27:28 +02:00
|
|
|
|
try:
|
|
|
|
|
cur = db.cursor()
|
|
|
|
|
|
|
|
|
|
# Check if an event with the same external_id exists
|
|
|
|
|
if 'external_id' in properties:
|
|
|
|
|
cur.execute("""
|
|
|
|
|
SELECT events_id FROM events
|
|
|
|
|
WHERE events_tags->>'external_id' = %s;
|
|
|
|
|
""", (properties['external_id'],))
|
|
|
|
|
|
|
|
|
|
result = cur.fetchone()
|
|
|
|
|
if result:
|
|
|
|
|
logger.info(f"Event with external_id {properties['external_id']} already exists")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# Check if an event with the same label, start, and stop exists
|
|
|
|
|
cur.execute("""
|
|
|
|
|
SELECT events_id FROM events
|
|
|
|
|
WHERE events_tags->>'label' = %s
|
|
|
|
|
AND events_tags->>'start' = %s
|
|
|
|
|
AND events_tags->>'stop' = %s;
|
|
|
|
|
""", (
|
|
|
|
|
properties.get('label', ''),
|
|
|
|
|
properties.get('start', ''),
|
|
|
|
|
properties.get('stop', '')
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
result = cur.fetchone()
|
|
|
|
|
if result:
|
|
|
|
|
logger.info(f"Event with label '{properties.get('label')}' and same dates already exists")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error checking if event exists: {e}")
|
|
|
|
|
return False
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
def submit_event(event):
|
|
|
|
|
"""
|
2025-09-26 11:57:54 +02:00
|
|
|
|
Submit an event to the OpenEventDatabase using the API.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Args:
|
|
|
|
|
event: A GeoJSON Feature representing the event.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
Returns:
|
|
|
|
|
bool: True if the event was successfully submitted, False otherwise.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
2025-09-26 11:57:54 +02:00
|
|
|
|
# Extract event properties for logging
|
2025-09-18 19:27:28 +02:00
|
|
|
|
properties = event['properties']
|
2025-09-26 11:57:54 +02:00
|
|
|
|
|
|
|
|
|
# API endpoint for OpenEventDatabase
|
|
|
|
|
api_url = "https://api.openeventdatabase.org/event"
|
|
|
|
|
|
|
|
|
|
# Make the API request
|
|
|
|
|
logger.info(f"Submitting event '{properties.get('label')}' to API")
|
|
|
|
|
response = requests.post(
|
|
|
|
|
api_url,
|
|
|
|
|
headers={"Content-Type": "application/json"},
|
|
|
|
|
data=json.dumps(event)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Check if the request was successful
|
|
|
|
|
if response.status_code == 200 or response.status_code == 201:
|
|
|
|
|
# Parse the response to get the event ID
|
|
|
|
|
response_data = response.json()
|
|
|
|
|
event_id = response_data.get('id')
|
2025-09-18 23:43:06 +02:00
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
if event_id:
|
|
|
|
|
logger.success(f"Event created with ID: {event_id}")
|
|
|
|
|
return True
|
2025-09-18 23:43:06 +02:00
|
|
|
|
else:
|
2025-09-26 11:57:54 +02:00
|
|
|
|
logger.warning(f"Event created but no ID returned in response")
|
|
|
|
|
return True
|
2025-09-18 19:27:28 +02:00
|
|
|
|
else:
|
2025-09-26 11:57:54 +02:00
|
|
|
|
logger.warning(f"Failed to create event: {properties.get('label')}. Status code: {response.status_code}")
|
|
|
|
|
logger.warning(f"Response: {response.text}")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
return False
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error submitting event: {e}")
|
|
|
|
|
return False
|
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
def main(max_events=1, offset=0):
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-26 11:57:54 +02:00
|
|
|
|
Main function to fetch OSM Calendar events and add them to the OpenEventDatabase API.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
max_events (int): Maximum number of events to insert (default: 1)
|
|
|
|
|
offset (int): Number of events to skip from the beginning of the RSS feed (default: 0)
|
2025-09-18 22:30:25 +02:00
|
|
|
|
|
|
|
|
|
The function will exit if the .env file doesn't exist, as it's required
|
2025-09-26 11:57:54 +02:00
|
|
|
|
for environment variables.
|
2025-09-18 19:27:28 +02:00
|
|
|
|
"""
|
2025-09-26 11:57:54 +02:00
|
|
|
|
logger.info(f"Starting OSM Calendar extractor (max_events={max_events}, offset={offset})")
|
2025-09-18 22:18:25 +02:00
|
|
|
|
|
2025-09-18 22:30:25 +02:00
|
|
|
|
# Load environment variables from .env file and check if it exists
|
|
|
|
|
if not load_env_from_file():
|
|
|
|
|
logger.error("Required .env file not found. Exiting.")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
logger.info("Environment variables loaded successfully from .env file")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
|
|
|
|
# Fetch events from the OSM Calendar RSS feed
|
|
|
|
|
items = fetch_osm_calendar_data()
|
|
|
|
|
|
|
|
|
|
if not items:
|
|
|
|
|
logger.warning("No events found, exiting")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
return
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
# Apply offset and limit
|
|
|
|
|
if offset >= len(items):
|
|
|
|
|
logger.warning(f"Offset {offset} is greater than or equal to the number of events {len(items)}, no events to process")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Slice the items list according to offset and max_events
|
|
|
|
|
items_to_process = items[offset:offset + max_events]
|
|
|
|
|
logger.info(f"Processing {len(items_to_process)} events (offset={offset}, max_events={max_events})")
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# Process each item
|
2025-09-18 19:27:28 +02:00
|
|
|
|
success_count = 0
|
2025-09-26 11:57:54 +02:00
|
|
|
|
for item in items_to_process:
|
2025-09-18 19:27:28 +02:00
|
|
|
|
# Create an event from the item
|
|
|
|
|
event = create_event(item)
|
|
|
|
|
|
2025-09-18 19:27:28 +02:00
|
|
|
|
if not event:
|
|
|
|
|
continue
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
# Submit the event to the API
|
2025-09-18 19:27:28 +02:00
|
|
|
|
if submit_event(event):
|
|
|
|
|
success_count += 1
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
2025-09-26 11:57:54 +02:00
|
|
|
|
logger.success(f"Successfully added {success_count} out of {len(items_to_process)} events to the OpenEventDatabase")
|
2025-09-18 19:27:28 +02:00
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2025-09-26 11:57:54 +02:00
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
# Set up command line argument parsing
|
|
|
|
|
parser = argparse.ArgumentParser(description='OSM Calendar Extractor for the OpenEventDatabase')
|
|
|
|
|
parser.add_argument('--max-events', type=int, default=1,
|
|
|
|
|
help='Maximum number of events to insert (default: 1)')
|
|
|
|
|
parser.add_argument('--offset', type=int, default=0,
|
|
|
|
|
help='Number of events to skip from the beginning of the RSS feed (default: 0)')
|
|
|
|
|
|
|
|
|
|
# Parse arguments
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
# Run the main function with the provided arguments
|
|
|
|
|
main(max_events=args.max_events, offset=args.offset)
|