| 
									
										
										
										
											2025-09-16 00:46:09 +02:00
										 |  |  | #!/usr/bin/env python3 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | SNCF Travaux Extractor for the OpenEventDatabase. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This script fetches railway work schedules from the SNCF open data API | 
					
						
							|  |  |  | and adds them to the OpenEventDatabase. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 22:18:25 +02:00
										 |  |  | API URL: https://data.sncf.com/api/explore/v2.1/catalog/datasets/interceptions-programmees-sur-ligne/records?limit=100 | 
					
						
							| 
									
										
										
										
											2025-09-16 00:46:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | Example data format: | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     "total_count":482, | 
					
						
							|  |  |  |     "results":[ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             "lib_structdem":"Siège INFRAPOLE PACA", | 
					
						
							|  |  |  |             "cod_ligne":"830000", | 
					
						
							|  |  |  |             "lib_ligne":"Ligne de Paris-Lyon à Marseille-St-Charles", | 
					
						
							|  |  |  |             "pk_debm":"687000", | 
					
						
							|  |  |  |             "pk_finm":"862100", | 
					
						
							|  |  |  |             "familletravaux":"renouvellement de la signalisation", | 
					
						
							|  |  |  |             "nb_interventions":1, | 
					
						
							|  |  |  |             "num_semaine":"31", | 
					
						
							|  |  |  |             "annee":"2023" | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To get a start date, we combine the "annee" (year) and "num_semaine" (week number) fields. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | import requests | 
					
						
							|  |  |  | import datetime | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | from datetime import datetime, timedelta | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Add the parent directory to the path so we can import from oedb | 
					
						
							|  |  |  | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from oedb.utils.db import db_connect | 
					
						
							|  |  |  | from oedb.utils.logging import logger | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # API URL for SNCF open data | 
					
						
							| 
									
										
										
										
											2025-09-18 22:18:25 +02:00
										 |  |  | API_URL = "https://data.sncf.com/api/explore/v2.1/catalog/datasets/interceptions-programmees-sur-ligne/records?limit=100" | 
					
						
							| 
									
										
										
										
											2025-09-16 00:46:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | def fetch_sncf_data(): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Fetch railway work planning data from the SNCF open data API. | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     Returns: | 
					
						
							|  |  |  |         list: A list of railway work records. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     logger.info("Fetching data from SNCF open data API") | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         response = requests.get(API_URL) | 
					
						
							|  |  |  |         response.raise_for_status()  # Raise an exception for HTTP errors | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         data = response.json() | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if 'results' not in data: | 
					
						
							|  |  |  |             logger.error("No results found in API response") | 
					
						
							|  |  |  |             return [] | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         logger.success(f"Successfully fetched {len(data['results'])} records from SNCF API") | 
					
						
							|  |  |  |         return data['results'] | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     except requests.exceptions.RequestException as e: | 
					
						
							|  |  |  |         logger.error(f"Error fetching data from SNCF API: {e}") | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  |     except json.JSONDecodeError as e: | 
					
						
							|  |  |  |         logger.error(f"Error decoding JSON response: {e}") | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def week_to_date(year, week_number): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Convert a year and week number to a date. | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         year (str or int): The year. | 
					
						
							|  |  |  |         week_number (str or int): The week number (1-53). | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     Returns: | 
					
						
							|  |  |  |         tuple: A tuple containing (start_date, end_date) as ISO format strings. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         # Convert inputs to integers | 
					
						
							|  |  |  |         year = int(year) | 
					
						
							|  |  |  |         week_number = int(week_number) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Validate inputs | 
					
						
							|  |  |  |         if week_number < 1 or week_number > 53: | 
					
						
							|  |  |  |             logger.warning(f"Invalid week number: {week_number}, using week 1 instead") | 
					
						
							|  |  |  |             week_number = 1 | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |         # Calculate the date of the first day of the week (Monday) | 
					
						
							|  |  |  |         # The %G and %V format codes are used for ISO week date | 
					
						
							|  |  |  |         # %G is the ISO year number and %V is the ISO week number | 
					
						
							|  |  |  |         start_date = datetime.strptime(f'{year}-{week_number}-1', '%Y-%W-%w') | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # If the week number is 0, it means the first partial week of the year | 
					
						
							|  |  |  |         if week_number == 0: | 
					
						
							|  |  |  |             start_date = datetime(year, 1, 1) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Calculate the end date (Sunday of the same week) | 
					
						
							|  |  |  |         end_date = start_date + timedelta(days=6) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Format dates as ISO strings | 
					
						
							|  |  |  |         start_iso = start_date.isoformat() | 
					
						
							|  |  |  |         end_iso = end_date.isoformat() | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         return (start_iso, end_iso) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     except ValueError as e: | 
					
						
							|  |  |  |         logger.error(f"Error converting week to date: {e}") | 
					
						
							|  |  |  |         # Return default dates (first week of the year) | 
					
						
							|  |  |  |         default_start = datetime(int(year), 1, 1).isoformat() | 
					
						
							|  |  |  |         default_end = (datetime(int(year), 1, 1) + timedelta(days=6)).isoformat() | 
					
						
							|  |  |  |         return (default_start, default_end) | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         logger.error(f"Unexpected error converting week to date: {e}") | 
					
						
							|  |  |  |         # Return default dates (current date) | 
					
						
							|  |  |  |         now = datetime.now() | 
					
						
							|  |  |  |         return (now.isoformat(), (now + timedelta(days=7)).isoformat()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_event(record): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Create an event object from a SNCF record. | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         record: A record from the SNCF API. | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     Returns: | 
					
						
							|  |  |  |         dict: A GeoJSON Feature representing the event. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         # Extract data from the record | 
					
						
							|  |  |  |         structure = record.get('lib_structdem', 'Unknown Structure') | 
					
						
							|  |  |  |         line_code = record.get('cod_ligne', 'Unknown Line Code') | 
					
						
							|  |  |  |         line_name = record.get('lib_ligne', 'Unknown Line') | 
					
						
							|  |  |  |         start_point = record.get('pk_debm', '') | 
					
						
							|  |  |  |         end_point = record.get('pk_finm', '') | 
					
						
							|  |  |  |         work_type = record.get('familletravaux', 'Unknown Work Type') | 
					
						
							|  |  |  |         interventions = record.get('nb_interventions', 1) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Extract year and week number | 
					
						
							|  |  |  |         year = record.get('annee') | 
					
						
							|  |  |  |         week_number = record.get('num_semaine') | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if not year or not week_number: | 
					
						
							|  |  |  |             logger.warning(f"Missing year or week number for {line_name}, skipping") | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Convert week number to start and end dates | 
					
						
							|  |  |  |         start_date, end_date = week_to_date(year, week_number) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Create a descriptive label | 
					
						
							|  |  |  |         label = f"Railway Work: {line_name} - {work_type}" | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Create a description with more details | 
					
						
							|  |  |  |         description = ( | 
					
						
							|  |  |  |             f"Railway maintenance work on line {line_code} ({line_name}) " | 
					
						
							|  |  |  |             f"from kilometer point {start_point} to {end_point}. " | 
					
						
							|  |  |  |             f"Type of work: {work_type}. " | 
					
						
							|  |  |  |             f"Number of interventions: {interventions}. " | 
					
						
							|  |  |  |             f"Managed by: {structure}." | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Use a placeholder location in France | 
					
						
							|  |  |  |         # In a real implementation, you might want to geocode the line or use a predefined location | 
					
						
							|  |  |  |         coordinates = [2.2137, 46.2276]  # Center of France | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Create the event object | 
					
						
							|  |  |  |         event = { | 
					
						
							|  |  |  |             "type": "Feature", | 
					
						
							|  |  |  |             "geometry": { | 
					
						
							|  |  |  |                 "type": "Point", | 
					
						
							|  |  |  |                 "coordinates": coordinates | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             "properties": { | 
					
						
							|  |  |  |                 "type": "scheduled", | 
					
						
							|  |  |  |                 "what": "transport.railway.maintenance", | 
					
						
							|  |  |  |                 "what:series": "SNCF Railway Maintenance", | 
					
						
							|  |  |  |                 "where": line_name, | 
					
						
							|  |  |  |                 "label": label, | 
					
						
							|  |  |  |                 "description": description, | 
					
						
							|  |  |  |                 "start": start_date, | 
					
						
							|  |  |  |                 "stop": end_date, | 
					
						
							|  |  |  |                 "line_code": line_code, | 
					
						
							|  |  |  |                 "work_type": work_type, | 
					
						
							|  |  |  |                 "interventions": interventions, | 
					
						
							|  |  |  |                 "start_point": start_point, | 
					
						
							|  |  |  |                 "end_point": end_point, | 
					
						
							|  |  |  |                 "structure": structure, | 
					
						
							|  |  |  |                 "source": "SNCF Open Data" | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         return event | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         logger.error(f"Error creating event from record: {e}") | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def submit_event(event): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Submit an event to the OpenEventDatabase. | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         event: A GeoJSON Feature representing the event. | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     Returns: | 
					
						
							|  |  |  |         bool: True if the event was successfully submitted, False otherwise. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         # Connect to the database | 
					
						
							|  |  |  |         db = db_connect() | 
					
						
							|  |  |  |         cur = db.cursor() | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Extract event properties | 
					
						
							|  |  |  |         properties = event['properties'] | 
					
						
							|  |  |  |         geometry = json.dumps(event['geometry']) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Insert the geometry into the geo table | 
					
						
							|  |  |  |         cur.execute("""
 | 
					
						
							|  |  |  |             INSERT INTO geo | 
					
						
							|  |  |  |             SELECT geom, md5(st_astext(geom)) as hash, st_centroid(geom) as geom_center FROM | 
					
						
							|  |  |  |                 (SELECT st_setsrid(st_geomfromgeojson(%s),4326) as geom) as g | 
					
						
							|  |  |  |             WHERE ST_IsValid(geom) | 
					
						
							|  |  |  |             ON CONFLICT DO NOTHING RETURNING hash; | 
					
						
							|  |  |  |         """, (geometry,))
 | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Get the geometry hash | 
					
						
							|  |  |  |         hash_result = cur.fetchone() | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if hash_result is None: | 
					
						
							|  |  |  |             # If the hash is None, get it from the database | 
					
						
							|  |  |  |             cur.execute("""
 | 
					
						
							|  |  |  |                 SELECT md5(st_asewkt(geom)), | 
					
						
							|  |  |  |                 ST_IsValid(geom), | 
					
						
							|  |  |  |                 ST_IsValidReason(geom) from (SELECT st_geomfromgeojson(%s) as geom) as g; | 
					
						
							|  |  |  |             """, (geometry,))
 | 
					
						
							|  |  |  |             hash_result = cur.fetchone() | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if hash_result is None or (len(hash_result) > 1 and not hash_result[1]): | 
					
						
							|  |  |  |             logger.error(f"Invalid geometry for event: {properties.get('label')}") | 
					
						
							|  |  |  |             db.close() | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         geo_hash = hash_result[0] | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Determine the bounds for the time range | 
					
						
							|  |  |  |         bounds = '[]' if properties['start'] == properties['stop'] else '[)' | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Insert the event into the database | 
					
						
							|  |  |  |         cur.execute("""
 | 
					
						
							|  |  |  |             INSERT INTO events (events_type, events_what, events_when, events_tags, events_geo) | 
					
						
							|  |  |  |             VALUES (%s, %s, tstzrange(%s, %s, %s), %s, %s) | 
					
						
							|  |  |  |             ON CONFLICT DO NOTHING RETURNING events_id; | 
					
						
							|  |  |  |         """, (
 | 
					
						
							|  |  |  |             properties['type'], | 
					
						
							|  |  |  |             properties['what'], | 
					
						
							|  |  |  |             properties['start'], | 
					
						
							|  |  |  |             properties['stop'], | 
					
						
							|  |  |  |             bounds, | 
					
						
							|  |  |  |             json.dumps(properties), | 
					
						
							|  |  |  |             geo_hash | 
					
						
							|  |  |  |         )) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Get the event ID | 
					
						
							|  |  |  |         event_id = cur.fetchone() | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if event_id: | 
					
						
							|  |  |  |             logger.success(f"Event created with ID: {event_id[0]}") | 
					
						
							|  |  |  |             db.commit() | 
					
						
							|  |  |  |             db.close() | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # Check if the event already exists | 
					
						
							|  |  |  |             cur.execute("""
 | 
					
						
							|  |  |  |                 SELECT events_id FROM events | 
					
						
							|  |  |  |                 WHERE events_what = %s | 
					
						
							|  |  |  |                 AND events_when = tstzrange(%s, %s, %s) | 
					
						
							|  |  |  |                 AND events_geo = %s; | 
					
						
							|  |  |  |             """, (
 | 
					
						
							|  |  |  |                 properties['what'], | 
					
						
							|  |  |  |                 properties['start'], | 
					
						
							|  |  |  |                 properties['stop'], | 
					
						
							|  |  |  |                 bounds, | 
					
						
							|  |  |  |                 geo_hash | 
					
						
							|  |  |  |             )) | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |             existing_id = cur.fetchone() | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |             if existing_id: | 
					
						
							|  |  |  |                 logger.info(f"Event already exists with ID: {existing_id[0]}") | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 logger.warning(f"Failed to create event: {properties.get('label')}") | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |             db.close() | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         logger.error(f"Error submitting event: {e}") | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Main function to fetch SNCF data and add events to the database. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     logger.info("Starting SNCF travaux extractor") | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     # Fetch data from the SNCF API | 
					
						
							|  |  |  |     records = fetch_sncf_data() | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     if not records: | 
					
						
							|  |  |  |         logger.warning("No records found, exiting") | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     # Process each record | 
					
						
							|  |  |  |     success_count = 0 | 
					
						
							|  |  |  |     for record in records: | 
					
						
							|  |  |  |         # Create an event from the record | 
					
						
							|  |  |  |         event = create_event(record) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if not event: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         # Submit the event to the database | 
					
						
							|  |  |  |         if submit_event(event): | 
					
						
							|  |  |  |             success_count += 1 | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     logger.success(f"Successfully added {success_count} out of {len(records)} events to the database") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |