add live page
This commit is contained in:
parent
114bcca24e
commit
eb8c42d0c0
19 changed files with 2759 additions and 199 deletions
|
@ -3,18 +3,35 @@
|
|||
OSM Calendar Extractor for the OpenEventDatabase.
|
||||
|
||||
This script fetches events from the OpenStreetMap Calendar RSS feed
|
||||
and adds them to the OpenEventDatabase if they don't already exist.
|
||||
and adds them to the OpenEventDatabase via the API.
|
||||
|
||||
For events that don't have geographic coordinates in the RSS feed but have a link
|
||||
to an OSM Calendar event (https://osmcal.org/event/...), the script will fetch
|
||||
the iCal version of the event and extract the coordinates and location from there.
|
||||
|
||||
RSS Feed URL: https://osmcal.org/events.rss
|
||||
API Endpoint: https://api.openeventdatabase.org/event
|
||||
|
||||
Usage:
|
||||
python osm_cal.py [--max-events MAX_EVENTS] [--offset OFFSET]
|
||||
|
||||
Arguments:
|
||||
--max-events MAX_EVENTS Maximum number of events to insert (default: 1)
|
||||
--offset OFFSET Number of events to skip from the beginning of the RSS feed (default: 0)
|
||||
|
||||
Examples:
|
||||
# Insert the first event from the RSS feed
|
||||
python osm_cal.py
|
||||
|
||||
# Insert up to 5 events from the RSS feed
|
||||
python osm_cal.py --max-events 5
|
||||
|
||||
# Skip the first 3 events and insert the next 2
|
||||
python osm_cal.py --offset 3 --max-events 2
|
||||
|
||||
Environment Variables:
|
||||
DB_NAME: The name of the database (default: "oedb")
|
||||
DB_HOST: The hostname of the database server (default: "localhost")
|
||||
DB_USER: The username to connect to the database (default: "")
|
||||
POSTGRES_PASSWORD: The password to connect to the database (default: None)
|
||||
|
||||
These environment variables can be set in the system environment or in a .env file
|
||||
in the project root directory.
|
||||
These environment variables can be set in the system environment or in a .env file
|
||||
in the project root directory.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
@ -34,6 +51,8 @@ from oedb.utils.logging import logger
|
|||
|
||||
# RSS Feed URL for OSM Calendar
|
||||
RSS_URL = "https://osmcal.org/events.rss"
|
||||
# Base URL for OSM Calendar events
|
||||
OSMCAL_EVENT_BASE_URL = "https://osmcal.org/event/"
|
||||
|
||||
def fetch_osm_calendar_data():
|
||||
"""
|
||||
|
@ -179,6 +198,73 @@ def parse_event_dates(description):
|
|||
now = datetime.now()
|
||||
return (now.isoformat(), (now + timedelta(days=1)).isoformat())
|
||||
|
||||
def fetch_ical_data(event_url):
|
||||
"""
|
||||
Fetch and parse iCal data for an OSM Calendar event.
|
||||
|
||||
Args:
|
||||
event_url (str): The URL of the OSM Calendar event.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing (location_name, coordinates).
|
||||
"""
|
||||
try:
|
||||
# Check if the URL is an OSM Calendar event URL
|
||||
if not event_url.startswith(OSMCAL_EVENT_BASE_URL):
|
||||
logger.warning(f"Not an OSM Calendar event URL: {event_url}")
|
||||
return ("Unknown Location", [0, 0])
|
||||
|
||||
# Extract the event ID from the URL
|
||||
event_id_match = re.search(r'event/(\d+)', event_url)
|
||||
if not event_id_match:
|
||||
logger.warning(f"Could not extract event ID from URL: {event_url}")
|
||||
return ("Unknown Location", [0, 0])
|
||||
|
||||
event_id = event_id_match.group(1)
|
||||
|
||||
# Construct the iCal URL
|
||||
ical_url = f"{OSMCAL_EVENT_BASE_URL}{event_id}.ics"
|
||||
|
||||
# Fetch the iCal content
|
||||
logger.info(f"Fetching iCal data from: {ical_url}")
|
||||
response = requests.get(ical_url)
|
||||
|
||||
if not response.ok:
|
||||
logger.warning(f"Failed to fetch iCal data: {response.status_code}")
|
||||
return ("Unknown Location", [0, 0])
|
||||
|
||||
# Parse the iCal content
|
||||
ical_content = response.text
|
||||
|
||||
# Extract GEO information
|
||||
geo_match = re.search(r'GEO:([-+]?\d+\.\d+);([-+]?\d+\.\d+)', ical_content)
|
||||
if geo_match:
|
||||
# GEO format is latitude;longitude
|
||||
latitude = float(geo_match.group(2))
|
||||
longitude = float(geo_match.group(1))
|
||||
coordinates = [longitude, latitude] # GeoJSON uses [longitude, latitude]
|
||||
logger.info(f"Extracted coordinates from iCal: {coordinates}")
|
||||
else:
|
||||
logger.warning(f"No GEO information found in iCal data for event: {event_id}")
|
||||
coordinates = [0, 0]
|
||||
|
||||
# Extract LOCATION information
|
||||
location_match = re.search(r'LOCATION:(.+?)(?:\r\n|\n|\r)', ical_content)
|
||||
if location_match:
|
||||
location_name = location_match.group(1).strip()
|
||||
# Unescape backslash-escaped characters (e.g., \, becomes ,)
|
||||
location_name = re.sub(r'\\(.)', r'\1', location_name)
|
||||
logger.info(f"Extracted location from iCal: {location_name}")
|
||||
else:
|
||||
logger.warning(f"No LOCATION information found in iCal data for event: {event_id}")
|
||||
location_name = "Unknown Location"
|
||||
|
||||
return (location_name, coordinates)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching or parsing iCal data: {e}")
|
||||
return ("Unknown Location", [0, 0])
|
||||
|
||||
def extract_location(description):
|
||||
"""
|
||||
Extract location information from the event description.
|
||||
|
@ -239,9 +325,25 @@ def create_event(item):
|
|||
# Parse dates from the description
|
||||
start_date, end_date = parse_event_dates(description)
|
||||
|
||||
# Extract location information
|
||||
# Extract location information from the description
|
||||
location_name, coordinates = extract_location(description)
|
||||
|
||||
# If we don't have coordinates and the link is to an OSM Calendar event,
|
||||
# try to get coordinates and location from the iCal file
|
||||
if coordinates == [0, 0] and link and link.startswith(OSMCAL_EVENT_BASE_URL):
|
||||
logger.info(f"No coordinates found in description, trying to get from iCal: {link}")
|
||||
ical_location_name, ical_coordinates = fetch_ical_data(link)
|
||||
|
||||
# Use iCal coordinates if available
|
||||
if ical_coordinates != [0, 0]:
|
||||
coordinates = ical_coordinates
|
||||
logger.info(f"Using coordinates from iCal: {coordinates}")
|
||||
|
||||
# Use iCal location name if available and better than what we have
|
||||
if ical_location_name != "Unknown Location":
|
||||
location_name = ical_location_name
|
||||
logger.info(f"Using location name from iCal: {location_name}")
|
||||
|
||||
# Create a descriptive label
|
||||
label = title
|
||||
|
||||
|
@ -325,7 +427,7 @@ def event_exists(db, properties):
|
|||
|
||||
def submit_event(event):
|
||||
"""
|
||||
Submit an event to the OpenEventDatabase.
|
||||
Submit an event to the OpenEventDatabase using the API.
|
||||
|
||||
Args:
|
||||
event: A GeoJSON Feature representing the event.
|
||||
|
@ -334,129 +436,53 @@ def submit_event(event):
|
|||
bool: True if the event was successfully submitted, False otherwise.
|
||||
"""
|
||||
try:
|
||||
# Connect to the database
|
||||
db = db_connect()
|
||||
|
||||
# Extract event properties
|
||||
# Extract event properties for logging
|
||||
properties = event['properties']
|
||||
|
||||
# Check if the event already exists
|
||||
if event_exists(db, properties):
|
||||
logger.info(f"Skipping event '{properties.get('label')}' as it already exists")
|
||||
db.close()
|
||||
return False
|
||||
|
||||
cur = db.cursor()
|
||||
geometry = json.dumps(event['geometry'])
|
||||
|
||||
print('event: ', event)
|
||||
# Insert the geometry into the geo table
|
||||
cur.execute("""
|
||||
INSERT INTO geo
|
||||
SELECT geom, md5(st_astext(geom)) as hash, st_centroid(geom) as geom_center FROM
|
||||
(SELECT st_setsrid(st_geomfromgeojson(%s),4326) as geom) as g
|
||||
WHERE ST_IsValid(geom)
|
||||
ON CONFLICT DO NOTHING RETURNING hash;
|
||||
""", (geometry,))
|
||||
|
||||
# Get the geometry hash
|
||||
hash_result = cur.fetchone()
|
||||
|
||||
if hash_result is None:
|
||||
# If the hash is None, check if the geometry already exists in the database
|
||||
cur.execute("""
|
||||
SELECT hash FROM geo
|
||||
WHERE hash = md5(st_astext(st_setsrid(st_geomfromgeojson(%s),4326)));
|
||||
""", (geometry,))
|
||||
existing_hash = cur.fetchone()
|
||||
|
||||
# API endpoint for OpenEventDatabase
|
||||
api_url = "https://api.openeventdatabase.org/event"
|
||||
|
||||
# Make the API request
|
||||
logger.info(f"Submitting event '{properties.get('label')}' to API")
|
||||
response = requests.post(
|
||||
api_url,
|
||||
headers={"Content-Type": "application/json"},
|
||||
data=json.dumps(event)
|
||||
)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200 or response.status_code == 201:
|
||||
# Parse the response to get the event ID
|
||||
response_data = response.json()
|
||||
event_id = response_data.get('id')
|
||||
|
||||
if existing_hash:
|
||||
# Geometry already exists in the database, use its hash
|
||||
geo_hash = existing_hash[0]
|
||||
logger.info(f"Using existing geometry with hash: {geo_hash}")
|
||||
if event_id:
|
||||
logger.success(f"Event created with ID: {event_id}")
|
||||
return True
|
||||
else:
|
||||
# Geometry doesn't exist, try to insert it directly
|
||||
cur.execute("""
|
||||
SELECT md5(st_astext(geom)) as hash,
|
||||
ST_IsValid(geom),
|
||||
ST_IsValidReason(geom) from (SELECT st_setsrid(st_geomfromgeojson(%s),4326) as geom) as g;
|
||||
""", (geometry,))
|
||||
hash_result = cur.fetchone()
|
||||
|
||||
if hash_result is None or not hash_result[1]:
|
||||
logger.error(f"Invalid geometry for event: {properties.get('label')}")
|
||||
if hash_result and len(hash_result) > 2:
|
||||
logger.error(f"Reason: {hash_result[2]}")
|
||||
db.close()
|
||||
return False
|
||||
|
||||
geo_hash = hash_result[0]
|
||||
|
||||
# Now insert the geometry explicitly
|
||||
cur.execute("""
|
||||
INSERT INTO geo (geom, hash, geom_center)
|
||||
VALUES (
|
||||
st_setsrid(st_geomfromgeojson(%s),4326),
|
||||
%s,
|
||||
st_centroid(st_setsrid(st_geomfromgeojson(%s),4326))
|
||||
)
|
||||
ON CONFLICT (hash) DO NOTHING;
|
||||
""", (geometry, geo_hash, geometry))
|
||||
|
||||
# Verify the geometry was inserted
|
||||
cur.execute("SELECT 1 FROM geo WHERE hash = %s", (geo_hash,))
|
||||
if cur.fetchone() is None:
|
||||
logger.error(f"Failed to insert geometry with hash: {geo_hash}")
|
||||
db.close()
|
||||
return False
|
||||
|
||||
logger.info(f"Inserted new geometry with hash: {geo_hash}")
|
||||
logger.warning(f"Event created but no ID returned in response")
|
||||
return True
|
||||
else:
|
||||
geo_hash = hash_result[0]
|
||||
|
||||
# Determine the bounds for the time range
|
||||
bounds = '[]' if properties['start'] == properties['stop'] else '[)'
|
||||
|
||||
# Insert the event into the database
|
||||
cur.execute("""
|
||||
INSERT INTO events (events_type, events_what, events_when, events_tags, events_geo)
|
||||
VALUES (%s, %s, tstzrange(%s, %s, %s), %s, %s)
|
||||
ON CONFLICT DO NOTHING RETURNING events_id;
|
||||
""", (
|
||||
properties['type'],
|
||||
properties['what'],
|
||||
properties['start'],
|
||||
properties['stop'],
|
||||
bounds,
|
||||
json.dumps(properties),
|
||||
geo_hash
|
||||
))
|
||||
|
||||
# Get the event ID
|
||||
event_id = cur.fetchone()
|
||||
|
||||
if event_id:
|
||||
logger.success(f"Event created with ID: {event_id[0]}")
|
||||
db.commit()
|
||||
db.close()
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Failed to create event: {properties.get('label')}")
|
||||
db.close()
|
||||
logger.warning(f"Failed to create event: {properties.get('label')}. Status code: {response.status_code}")
|
||||
logger.warning(f"Response: {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error submitting event: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
def main(max_events=1, offset=0):
|
||||
"""
|
||||
Main function to fetch OSM Calendar events and add them to the database.
|
||||
Main function to fetch OSM Calendar events and add them to the OpenEventDatabase API.
|
||||
|
||||
Args:
|
||||
max_events (int): Maximum number of events to insert (default: 1)
|
||||
offset (int): Number of events to skip from the beginning of the RSS feed (default: 0)
|
||||
|
||||
The function will exit if the .env file doesn't exist, as it's required
|
||||
for database connection parameters.
|
||||
for environment variables.
|
||||
"""
|
||||
logger.info("Starting OSM Calendar extractor")
|
||||
logger.info(f"Starting OSM Calendar extractor (max_events={max_events}, offset={offset})")
|
||||
|
||||
# Load environment variables from .env file and check if it exists
|
||||
if not load_env_from_file():
|
||||
|
@ -472,20 +498,42 @@ def main():
|
|||
logger.warning("No events found, exiting")
|
||||
return
|
||||
|
||||
# Apply offset and limit
|
||||
if offset >= len(items):
|
||||
logger.warning(f"Offset {offset} is greater than or equal to the number of events {len(items)}, no events to process")
|
||||
return
|
||||
|
||||
# Slice the items list according to offset and max_events
|
||||
items_to_process = items[offset:offset + max_events]
|
||||
logger.info(f"Processing {len(items_to_process)} events (offset={offset}, max_events={max_events})")
|
||||
|
||||
# Process each item
|
||||
success_count = 0
|
||||
for item in items:
|
||||
for item in items_to_process:
|
||||
# Create an event from the item
|
||||
event = create_event(item)
|
||||
|
||||
if not event:
|
||||
continue
|
||||
|
||||
# Submit the event to the database
|
||||
# Submit the event to the API
|
||||
if submit_event(event):
|
||||
success_count += 1
|
||||
|
||||
logger.success(f"Successfully added {success_count} out of {len(items)} events to the database")
|
||||
logger.success(f"Successfully added {success_count} out of {len(items_to_process)} events to the OpenEventDatabase")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
import argparse
|
||||
|
||||
# Set up command line argument parsing
|
||||
parser = argparse.ArgumentParser(description='OSM Calendar Extractor for the OpenEventDatabase')
|
||||
parser.add_argument('--max-events', type=int, default=1,
|
||||
help='Maximum number of events to insert (default: 1)')
|
||||
parser.add_argument('--offset', type=int, default=0,
|
||||
help='Number of events to skip from the beginning of the RSS feed (default: 0)')
|
||||
|
||||
# Parse arguments
|
||||
args = parser.parse_args()
|
||||
|
||||
# Run the main function with the provided arguments
|
||||
main(max_events=args.max_events, offset=args.offset)
|
Loading…
Add table
Add a link
Reference in a new issue