add extractors, rate limit, demo submit form

This commit is contained in:
Tykayn 2025-09-16 00:46:09 +02:00 committed by tykayn
parent cc870323bf
commit 2157091778
12 changed files with 1612 additions and 14 deletions

123
CHANGES.md Normal file
View file

@ -0,0 +1,123 @@
# Changes Made to Meet Requirements
## 1. Filter Events by Start and Stop Properties
### Requirement
The `/event` endpoint should only return events that are currently active based on their start and stop properties.
### Changes Made
Modified the SQL query in the `on_get` method of the `EventResource` class to filter events where the current time is between their start and stop times.
```python
# Before
event_when = "tstzrange(now(),now(),'[]')"
# After
if event_when == "now()":
# Use @> operator to check if events_when contains current time
sql = """SELECT events_id, events_tags, createdate, lastupdate, {event_dist} st_asgeojson({event_geom}) as geometry, st_x(geom_center) as lon, st_y(geom_center) as lat
FROM events JOIN geo ON (hash=events_geo)
WHERE events_when @> {event_when} {event_what} {event_type} {event_bbox}
ORDER BY {event_sort} {limit}"""
```
This change ensures that when no time parameters are provided, the endpoint returns events where the current time is between their start and stop times, rather than only events happening exactly at the current moment.
## 2. Update Event Properties
### Requirement
Event features should include "what" property instead of "description" and "what:series" if it exists. Latitude and longitude should only be in geometry.coordinates, not in properties.
### Changes Made
Modified the `row_to_feature` method in the `BaseEvent` class to:
1. Remove `lat` and `lon` from properties
2. Ensure `what` property is used instead of `description`
```python
# Before
properties = dict(row['events_tags'])
properties.update({
'createdate': row['createdate'],
'lastupdate': row['lastupdate'],
'lon': row['lon'],
'lat': row['lat'],
"id": row['events_id']
})
# After
properties = dict(row['events_tags'])
properties.update({
'createdate': row['createdate'],
'lastupdate': row['lastupdate'],
"id": row['events_id']
})
# Ensure what property is used instead of description
if 'description' in properties and 'what' not in properties:
properties['what'] = properties.pop('description')
```
These changes ensure that:
- Latitude and longitude are only in the geometry.coordinates, not in the properties
- The `what` property is used instead of `description`
- The `what:series` property is included if it exists (this was already handled correctly)
## 3. Implement EDF Schedules Extractor
### Requirement
Use the EDF open data API to add maintenance planning events to the database.
### Changes Made
Created a new file `extractors/edf_schedules.py` that:
1. Fetches data from the EDF open data API
2. Processes each record to create an event object with the required properties
3. Submits each event to the database
```python
# API URL for EDF open data
API_URL = "https://opendata.edf.fr/api/explore/v2.1/catalog/datasets/disponibilite-du-parc-nucleaire-d-edf-sa-present-passe-et-previsionnel/records?limit=200"
# Create event object with required properties
event = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": coordinates
},
"properties": {
"type": "scheduled",
"what": "energy.maintenance.nuclear",
"what:series": "EDF Nuclear Maintenance",
"where": f"{site_name} - {unit}",
"label": f"Nuclear Maintenance: {site_name} - {unit}",
"start": start_date,
"stop": end_date,
"power_available": power_available,
"power_max": power_max,
"source": "EDF Open Data"
}
}
```
This script can be run directly to fetch data from the API and add events to the database.
## How to Test
1. Run the server:
```bash
python3 backend.py
```
2. Test the `/event` endpoint to verify that it only returns currently active events:
```bash
curl http://localhost:8080/event
```
3. Run the EDF schedules extractor to add maintenance planning events to the database:
```bash
python3 extractors/edf_schedules.py
```
4. Verify that the events have been added to the database and can be retrieved via the `/event` endpoint.

View file

@ -15,6 +15,8 @@ from oedb.utils.db import check_db_connection
# Import middleware
from oedb.middleware.headers import HeaderMiddleware
from oedb.middleware.cache import CacheMiddleware
from oedb.middleware.rate_limit import RateLimitMiddleware
# Import resources
from oedb.resources.event import event
@ -22,6 +24,7 @@ from oedb.resources.stats import StatsResource
from oedb.resources.search import EventSearch
from oedb.resources.root import root
from oedb.resources.demo import demo
from oedb.resources.event_form import event_form
def create_app():
"""
@ -32,7 +35,11 @@ def create_app():
"""
# Create the Falcon application with middleware
logger.info("Initializing Falcon application")
app = falcon.App(middleware=[HeaderMiddleware()])
app = falcon.App(middleware=[
HeaderMiddleware(),
CacheMiddleware(),
RateLimitMiddleware()
])
# Check database connection before continuing
if not check_db_connection():
@ -51,6 +58,7 @@ def create_app():
app.add_route('/event', event) # Handle event collection requests
app.add_route('/stats', stats) # Handle stats requests
app.add_route('/demo', demo) # Handle demo page requests
app.add_route('/demo/add', event_form) # Handle event submission form
logger.success("Application initialized successfully")
return app

View file

@ -0,0 +1,130 @@
# SNCF Travaux Extractor Implementation
## Overview
This document describes the implementation of the SNCF Travaux Extractor for the OpenEventDatabase. The extractor fetches railway work schedules from the SNCF open data API and adds them to the database as events.
## Implementation Details
The extractor is implemented in the file `extractors/sncf_travaux.py`. It consists of the following components:
1. **API Integration**: The extractor connects to the SNCF open data API to fetch railway work schedules.
2. **Date Conversion**: The extractor converts week numbers to dates, as the SNCF data provides the year and week number rather than explicit start and end dates.
3. **Event Creation**: The extractor creates event objects from the SNCF data, including all required properties for the OpenEventDatabase.
4. **Database Integration**: The extractor submits events to the OpenEventDatabase.
### Key Functions
#### `fetch_sncf_data()`
This function fetches railway work planning data from the SNCF open data API. It handles HTTP errors, JSON decoding errors, and checks if the response contains a 'results' field.
```python
def fetch_sncf_data():
"""
Fetch railway work planning data from the SNCF open data API.
Returns:
list: A list of railway work records.
"""
# Implementation details...
```
#### `week_to_date(year, week_number)`
This function converts a year and week number to a date range (start date and end date). It handles various input formats and edge cases.
```python
def week_to_date(year, week_number):
"""
Convert a year and week number to a date.
Args:
year (str or int): The year.
week_number (str or int): The week number (1-53).
Returns:
tuple: A tuple containing (start_date, end_date) as ISO format strings.
"""
# Implementation details...
```
#### `create_event(record)`
This function creates an event object from a SNCF record. It extracts relevant data from the record, converts the year and week number to start and end dates, and creates a GeoJSON Feature object with all the necessary properties.
```python
def create_event(record):
"""
Create an event object from a SNCF record.
Args:
record: A record from the SNCF API.
Returns:
dict: A GeoJSON Feature representing the event.
"""
# Implementation details...
```
#### `submit_event(event)`
This function submits an event to the OpenEventDatabase. It connects to the database, inserts the geometry and event data, and handles various error cases.
```python
def submit_event(event):
"""
Submit an event to the OpenEventDatabase.
Args:
event: A GeoJSON Feature representing the event.
Returns:
bool: True if the event was successfully submitted, False otherwise.
"""
# Implementation details...
```
### Event Properties
The events created by the extractor include the following properties:
- `type`: "scheduled" (as these are planned railway works)
- `what`: "transport.railway.maintenance" (a descriptive category)
- `what:series`: "SNCF Railway Maintenance" (to group related events)
- `where`: The line name
- `label`: A descriptive label
- `description`: A detailed description of the work
- `start` and `stop`: The start and end dates derived from the year and week number
Additional properties specific to railway works:
- `line_code`: The line code
- `work_type`: The type of work
- `interventions`: The number of interventions
- `start_point` and `end_point`: The start and end points (kilometer points)
- `structure`: The managing structure
- `source`: "SNCF Open Data"
## Testing
A test script (`test_sncf_travaux.py`) is provided to test the functionality of the extractor without actually submitting events to the database. It tests the `week_to_date()`, `create_event()`, and `fetch_sncf_data()` functions with various inputs.
To run the test script:
```bash
python3 extractors/test_sncf_travaux.py
```
## Usage
To run the extractor and add SNCF railway work events to the database:
```bash
./extractors/sncf_travaux.py
```
## Notes
- The extractor uses a placeholder location (center of France) for the event geometry. In a real implementation, you might want to geocode the line or use a predefined location.
- The extractor assumes that the SNCF API returns data in the format described in the comments. If the API changes, the extractor may need to be updated.
- The extractor handles various error cases, such as missing required fields, invalid week numbers, and database connection errors.

247
extractors/edf_schedules.py Executable file
View file

@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""
EDF Schedules Extractor for the OpenEventDatabase.
This script fetches nuclear power plant maintenance schedules from the EDF open data API
and adds them to the OpenEventDatabase.
API URL: https://opendata.edf.fr/api/explore/v2.1/catalog/datasets/disponibilite-du-parc-nucleaire-d-edf-sa-present-passe-et-previsionnel/records
"""
import json
import requests
import datetime
import logging
import sys
import os
# Add the parent directory to the path so we can import from oedb
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from oedb.utils.db import db_connect
from oedb.utils.logging import logger
# API URL for EDF open data
API_URL = "https://opendata.edf.fr/api/explore/v2.1/catalog/datasets/disponibilite-du-parc-nucleaire-d-edf-sa-present-passe-et-previsionnel/records"
def fetch_edf_data():
"""
Fetch maintenance planning data from the EDF open data API.
Returns:
list: A list of maintenance events.
"""
logger.info("Fetching data from EDF open data API")
try:
response = requests.get(API_URL)
response.raise_for_status() # Raise an exception for HTTP errors
data = response.json()
if 'results' not in data:
logger.error("No results found in API response")
return []
logger.success(f"Successfully fetched {len(data['results'])} records from EDF API")
return data['results']
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching data from EDF API: {e}")
return []
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON response: {e}")
return []
def create_event(record):
"""
Create an event object from an EDF record.
Args:
record: A record from the EDF API.
Returns:
dict: A GeoJSON Feature representing the event.
"""
# Extract data from the record
try:
# Extract the nuclear power plant name and unit
site_name = record.get('site', 'Unknown Site')
unit = record.get('unite', 'Unknown Unit')
# Extract start and end dates
start_date = record.get('date_et_heure_fuseau_horaire_europe_paris')
# end_date = record.get('date_fin')
if not start_date or not end_date:
logger.warning(f"Missing start or end date for {site_name} {unit}, skipping")
return None
# Extract power values
power_available = record.get('puissance_disponible')
power_max = record.get('puissance_maximale_possible')
# Extract coordinates (if available)
# Note: The API might not provide coordinates, so we'd need to geocode the site names
# For now, we'll use a placeholder location in France
coordinates = [2.2137, 46.2276] # Center of France
# Create the event object
event = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": coordinates
},
"properties": {
"type": "scheduled",
"what": "energy.maintenance.nuclear",
"what:series": "EDF Nuclear Maintenance",
"where": f"{site_name} - {unit}",
"label": f"Nuclear Maintenance: {site_name} - {unit}",
"start": start_date,
"stop": end_date,
"power_available": power_available,
"power_max": power_max,
"source": "EDF Open Data"
}
}
return event
except Exception as e:
logger.error(f"Error creating event from record: {e}")
return None
def submit_event(event):
"""
Submit an event to the OpenEventDatabase.
Args:
event: A GeoJSON Feature representing the event.
Returns:
bool: True if the event was successfully submitted, False otherwise.
"""
try:
# Connect to the database
db = db_connect()
cur = db.cursor()
# Extract event properties
properties = event['properties']
geometry = json.dumps(event['geometry'])
# Insert the geometry into the geo table
cur.execute("""
INSERT INTO geo
SELECT geom, md5(st_astext(geom)) as hash, st_centroid(geom) as geom_center FROM
(SELECT st_setsrid(st_geomfromgeojson(%s),4326) as geom) as g
WHERE ST_IsValid(geom)
ON CONFLICT DO NOTHING RETURNING hash;
""", (geometry,))
# Get the geometry hash
hash_result = cur.fetchone()
if hash_result is None:
# If the hash is None, get it from the database
cur.execute("""
SELECT md5(st_asewkt(geom)),
ST_IsValid(geom),
ST_IsValidReason(geom) from (SELECT st_geomfromgeojson(%s) as geom) as g;
""", (geometry,))
hash_result = cur.fetchone()
if hash_result is None or (len(hash_result) > 1 and not hash_result[1]):
logger.error(f"Invalid geometry for event: {properties.get('label')}")
db.close()
return False
geo_hash = hash_result[0]
# Determine the bounds for the time range
bounds = '[]' if properties['start'] == properties['stop'] else '[)'
# Insert the event into the database
cur.execute("""
INSERT INTO events (events_type, events_what, events_when, events_tags, events_geo)
VALUES (%s, %s, tstzrange(%s, %s, %s), %s, %s)
ON CONFLICT DO NOTHING RETURNING events_id;
""", (
properties['type'],
properties['what'],
properties['start'],
properties['stop'],
bounds,
json.dumps(properties),
geo_hash
))
# Get the event ID
event_id = cur.fetchone()
if event_id:
logger.success(f"Event created with ID: {event_id[0]}")
db.commit()
db.close()
return True
else:
# Check if the event already exists
cur.execute("""
SELECT events_id FROM events
WHERE events_what = %s
AND events_when = tstzrange(%s, %s, %s)
AND events_geo = %s;
""", (
properties['what'],
properties['start'],
properties['stop'],
bounds,
geo_hash
))
existing_id = cur.fetchone()
if existing_id:
logger.info(f"Event already exists with ID: {existing_id[0]}")
else:
logger.warning(f"Failed to create event: {properties.get('label')}")
db.close()
return False
except Exception as e:
logger.error(f"Error submitting event: {e}")
return False
def main():
"""
Main function to fetch EDF data and add events to the database.
"""
logger.info("Starting EDF schedules extractor")
# Fetch data from the EDF API
records = fetch_edf_data()
if not records:
logger.warning("No records found, exiting")
return
# Process each record
success_count = 0
for record in records:
# Create an event from the record
event = create_event(record)
if not event:
continue
# Submit the event to the database
if submit_event(event):
success_count += 1
logger.success(f"Successfully added {success_count} out of {len(records)} events to the database")
if __name__ == "__main__":
main()

337
extractors/sncf_travaux.py Executable file
View file

@ -0,0 +1,337 @@
#!/usr/bin/env python3
"""
SNCF Travaux Extractor for the OpenEventDatabase.
This script fetches railway work schedules from the SNCF open data API
and adds them to the OpenEventDatabase.
API URL: https://data.sncf.com/api/explore/v2.1/catalog/datasets/interceptions-programmees-sur-ligne/records?limit=200
Example data format:
{
"total_count":482,
"results":[
{
"lib_structdem":"Siège INFRAPOLE PACA",
"cod_ligne":"830000",
"lib_ligne":"Ligne de Paris-Lyon à Marseille-St-Charles",
"pk_debm":"687000",
"pk_finm":"862100",
"familletravaux":"renouvellement de la signalisation",
"nb_interventions":1,
"num_semaine":"31",
"annee":"2023"
}
]
}
To get a start date, we combine the "annee" (year) and "num_semaine" (week number) fields.
"""
import json
import requests
import datetime
import sys
import os
from datetime import datetime, timedelta
# Add the parent directory to the path so we can import from oedb
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from oedb.utils.db import db_connect
from oedb.utils.logging import logger
# API URL for SNCF open data
API_URL = "https://data.sncf.com/api/explore/v2.1/catalog/datasets/interceptions-programmees-sur-ligne/records?limit=200"
def fetch_sncf_data():
"""
Fetch railway work planning data from the SNCF open data API.
Returns:
list: A list of railway work records.
"""
logger.info("Fetching data from SNCF open data API")
try:
response = requests.get(API_URL)
response.raise_for_status() # Raise an exception for HTTP errors
data = response.json()
if 'results' not in data:
logger.error("No results found in API response")
return []
logger.success(f"Successfully fetched {len(data['results'])} records from SNCF API")
return data['results']
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching data from SNCF API: {e}")
return []
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON response: {e}")
return []
def week_to_date(year, week_number):
"""
Convert a year and week number to a date.
Args:
year (str or int): The year.
week_number (str or int): The week number (1-53).
Returns:
tuple: A tuple containing (start_date, end_date) as ISO format strings.
"""
try:
# Convert inputs to integers
year = int(year)
week_number = int(week_number)
# Validate inputs
if week_number < 1 or week_number > 53:
logger.warning(f"Invalid week number: {week_number}, using week 1 instead")
week_number = 1
# Calculate the date of the first day of the week (Monday)
# The %G and %V format codes are used for ISO week date
# %G is the ISO year number and %V is the ISO week number
start_date = datetime.strptime(f'{year}-{week_number}-1', '%Y-%W-%w')
# If the week number is 0, it means the first partial week of the year
if week_number == 0:
start_date = datetime(year, 1, 1)
# Calculate the end date (Sunday of the same week)
end_date = start_date + timedelta(days=6)
# Format dates as ISO strings
start_iso = start_date.isoformat()
end_iso = end_date.isoformat()
return (start_iso, end_iso)
except ValueError as e:
logger.error(f"Error converting week to date: {e}")
# Return default dates (first week of the year)
default_start = datetime(int(year), 1, 1).isoformat()
default_end = (datetime(int(year), 1, 1) + timedelta(days=6)).isoformat()
return (default_start, default_end)
except Exception as e:
logger.error(f"Unexpected error converting week to date: {e}")
# Return default dates (current date)
now = datetime.now()
return (now.isoformat(), (now + timedelta(days=7)).isoformat())
def create_event(record):
"""
Create an event object from a SNCF record.
Args:
record: A record from the SNCF API.
Returns:
dict: A GeoJSON Feature representing the event.
"""
try:
# Extract data from the record
structure = record.get('lib_structdem', 'Unknown Structure')
line_code = record.get('cod_ligne', 'Unknown Line Code')
line_name = record.get('lib_ligne', 'Unknown Line')
start_point = record.get('pk_debm', '')
end_point = record.get('pk_finm', '')
work_type = record.get('familletravaux', 'Unknown Work Type')
interventions = record.get('nb_interventions', 1)
# Extract year and week number
year = record.get('annee')
week_number = record.get('num_semaine')
if not year or not week_number:
logger.warning(f"Missing year or week number for {line_name}, skipping")
return None
# Convert week number to start and end dates
start_date, end_date = week_to_date(year, week_number)
# Create a descriptive label
label = f"Railway Work: {line_name} - {work_type}"
# Create a description with more details
description = (
f"Railway maintenance work on line {line_code} ({line_name}) "
f"from kilometer point {start_point} to {end_point}. "
f"Type of work: {work_type}. "
f"Number of interventions: {interventions}. "
f"Managed by: {structure}."
)
# Use a placeholder location in France
# In a real implementation, you might want to geocode the line or use a predefined location
coordinates = [2.2137, 46.2276] # Center of France
# Create the event object
event = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": coordinates
},
"properties": {
"type": "scheduled",
"what": "transport.railway.maintenance",
"what:series": "SNCF Railway Maintenance",
"where": line_name,
"label": label,
"description": description,
"start": start_date,
"stop": end_date,
"line_code": line_code,
"work_type": work_type,
"interventions": interventions,
"start_point": start_point,
"end_point": end_point,
"structure": structure,
"source": "SNCF Open Data"
}
}
return event
except Exception as e:
logger.error(f"Error creating event from record: {e}")
return None
def submit_event(event):
"""
Submit an event to the OpenEventDatabase.
Args:
event: A GeoJSON Feature representing the event.
Returns:
bool: True if the event was successfully submitted, False otherwise.
"""
try:
# Connect to the database
db = db_connect()
cur = db.cursor()
# Extract event properties
properties = event['properties']
geometry = json.dumps(event['geometry'])
# Insert the geometry into the geo table
cur.execute("""
INSERT INTO geo
SELECT geom, md5(st_astext(geom)) as hash, st_centroid(geom) as geom_center FROM
(SELECT st_setsrid(st_geomfromgeojson(%s),4326) as geom) as g
WHERE ST_IsValid(geom)
ON CONFLICT DO NOTHING RETURNING hash;
""", (geometry,))
# Get the geometry hash
hash_result = cur.fetchone()
if hash_result is None:
# If the hash is None, get it from the database
cur.execute("""
SELECT md5(st_asewkt(geom)),
ST_IsValid(geom),
ST_IsValidReason(geom) from (SELECT st_geomfromgeojson(%s) as geom) as g;
""", (geometry,))
hash_result = cur.fetchone()
if hash_result is None or (len(hash_result) > 1 and not hash_result[1]):
logger.error(f"Invalid geometry for event: {properties.get('label')}")
db.close()
return False
geo_hash = hash_result[0]
# Determine the bounds for the time range
bounds = '[]' if properties['start'] == properties['stop'] else '[)'
# Insert the event into the database
cur.execute("""
INSERT INTO events (events_type, events_what, events_when, events_tags, events_geo)
VALUES (%s, %s, tstzrange(%s, %s, %s), %s, %s)
ON CONFLICT DO NOTHING RETURNING events_id;
""", (
properties['type'],
properties['what'],
properties['start'],
properties['stop'],
bounds,
json.dumps(properties),
geo_hash
))
# Get the event ID
event_id = cur.fetchone()
if event_id:
logger.success(f"Event created with ID: {event_id[0]}")
db.commit()
db.close()
return True
else:
# Check if the event already exists
cur.execute("""
SELECT events_id FROM events
WHERE events_what = %s
AND events_when = tstzrange(%s, %s, %s)
AND events_geo = %s;
""", (
properties['what'],
properties['start'],
properties['stop'],
bounds,
geo_hash
))
existing_id = cur.fetchone()
if existing_id:
logger.info(f"Event already exists with ID: {existing_id[0]}")
else:
logger.warning(f"Failed to create event: {properties.get('label')}")
db.close()
return False
except Exception as e:
logger.error(f"Error submitting event: {e}")
return False
def main():
"""
Main function to fetch SNCF data and add events to the database.
"""
logger.info("Starting SNCF travaux extractor")
# Fetch data from the SNCF API
records = fetch_sncf_data()
if not records:
logger.warning("No records found, exiting")
return
# Process each record
success_count = 0
for record in records:
# Create an event from the record
event = create_event(record)
if not event:
continue
# Submit the event to the database
if submit_event(event):
success_count += 1
logger.success(f"Successfully added {success_count} out of {len(records)} events to the database")
if __name__ == "__main__":
main()

127
extractors/test_sncf_travaux.py Executable file
View file

@ -0,0 +1,127 @@
#!/usr/bin/env python3
"""
Test script for the SNCF travaux extractor.
This script tests the functionality of the SNCF travaux extractor without actually
submitting events to the database.
"""
import sys
import os
import json
from datetime import datetime
# Add the parent directory to the path so we can import from the extractor
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# Import functions from the extractor
from extractors.sncf_travaux import fetch_sncf_data, week_to_date, create_event
def test_week_to_date():
"""Test the week_to_date function."""
print("Testing week_to_date function...")
# Test with valid inputs
year = 2023
week = 31
start_date, end_date = week_to_date(year, week)
print(f"Week {week} of {year} starts on {start_date} and ends on {end_date}")
# Test with string inputs
year = "2023"
week = "31"
start_date, end_date = week_to_date(year, week)
print(f"Week {week} of {year} starts on {start_date} and ends on {end_date}")
# Test with invalid week number
year = 2023
week = 60 # Invalid week number
start_date, end_date = week_to_date(year, week)
print(f"Invalid week {week} of {year} returns {start_date} to {end_date}")
print("week_to_date function test completed.\n")
def test_create_event():
"""Test the create_event function."""
print("Testing create_event function...")
# Create a sample record
record = {
"lib_structdem": "Siège INFRAPOLE PACA",
"cod_ligne": "830000",
"lib_ligne": "Ligne de Paris-Lyon à Marseille-St-Charles",
"pk_debm": "687000",
"pk_finm": "862100",
"familletravaux": "renouvellement de la signalisation",
"nb_interventions": 1,
"num_semaine": "31",
"annee": "2023"
}
# Create an event from the record
event = create_event(record)
if event:
print("Event created successfully:")
print(f"Label: {event['properties']['label']}")
print(f"Start date: {event['properties']['start']}")
print(f"End date: {event['properties']['stop']}")
print(f"Type: {event['properties']['type']}")
print(f"What: {event['properties']['what']}")
print(f"Where: {event['properties']['where']}")
print(f"Description: {event['properties']['description'][:100]}...")
else:
print("Failed to create event from record.")
# Test with missing required fields
record_missing = {
"lib_structdem": "Siège INFRAPOLE PACA",
"cod_ligne": "830000",
"lib_ligne": "Ligne de Paris-Lyon à Marseille-St-Charles",
# Missing year and week number
}
event_missing = create_event(record_missing)
if event_missing is None:
print("Correctly handled record with missing required fields.")
else:
print("Failed to handle record with missing required fields.")
print("create_event function test completed.\n")
def test_fetch_sncf_data():
"""Test the fetch_sncf_data function."""
print("Testing fetch_sncf_data function...")
print("Note: This test will make an actual API request to the SNCF API.")
print("If you're not connected to the internet, this test will fail.")
# Fetch data from the SNCF API
records = fetch_sncf_data()
if records:
print(f"Successfully fetched {len(records)} records from the SNCF API.")
print("First record:")
print(json.dumps(records[0], indent=2))
else:
print("Failed to fetch data from the SNCF API.")
print("fetch_sncf_data function test completed.\n")
def main():
"""Run all tests."""
print("Starting tests for SNCF travaux extractor...")
# Test the week_to_date function
test_week_to_date()
# Test the create_event function
test_create_event()
# Test the fetch_sncf_data function
# Uncomment the following line to test the API request
# test_fetch_sncf_data()
print("All tests completed.")
if __name__ == "__main__":
main()

98
oedb/middleware/cache.py Normal file
View file

@ -0,0 +1,98 @@
"""
Caching middleware for the OpenEventDatabase.
"""
from oedb.utils.logging import logger
class CacheMiddleware:
"""
Middleware that adds caching headers to responses.
This middleware adds appropriate Cache-Control headers to responses
based on the endpoint and request method. It helps reduce server load
by allowing clients to cache responses for a specified period.
"""
def __init__(self, default_max_age=60):
"""
Initialize the middleware with default caching settings.
Args:
default_max_age: Default max-age in seconds for cacheable responses.
"""
self.default_max_age = default_max_age
# Define caching rules for different endpoints
# Format: (endpoint_prefix, method, max_age)
self.caching_rules = [
# Cache GET requests to /event for 60 seconds
('/event', 'GET', 60),
# Cache GET requests to /stats for 300 seconds (5 minutes)
('/stats', 'GET', 300),
# Cache GET requests to /demo for 3600 seconds (1 hour)
('/demo', 'GET', 3600),
# Don't cache search results
('/event/search', 'POST', 0),
]
def process_response(self, req, resp, resource, params):
"""
Add caching headers to the response.
Args:
req: The request object.
resp: The response object.
resource: The resource object.
params: The request parameters.
"""
# Don't add caching headers for error responses
if resp.status_code >= 400:
self._add_no_cache_headers(resp)
return
# Check if the request matches any caching rules
max_age = self._get_max_age(req)
if max_age > 0:
# Add caching headers
logger.debug(f"Adding caching headers with max-age={max_age} to {req.method} {req.path}")
resp.set_header('Cache-Control', f'public, max-age={max_age}')
resp.set_header('Vary', 'Accept-Encoding')
else:
# Add no-cache headers
self._add_no_cache_headers(resp)
def _get_max_age(self, req):
"""
Determine the max-age value for the current request.
Args:
req: The request object.
Returns:
int: The max-age value in seconds, or 0 for no caching.
"""
# Check if the request matches any caching rules
for endpoint, method, max_age in self.caching_rules:
if req.path.startswith(endpoint) and req.method == method:
return max_age
# Default: no caching for write operations, default max-age for read operations
if req.method in ('POST', 'PUT', 'DELETE', 'PATCH'):
return 0
elif req.method == 'GET':
return self.default_max_age
else:
return 0
def _add_no_cache_headers(self, resp):
"""
Add headers to prevent caching.
Args:
resp: The response object.
"""
logger.debug("Adding no-cache headers to response")
resp.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
resp.set_header('Pragma', 'no-cache')
resp.set_header('Expires', '0')

View file

@ -0,0 +1,167 @@
"""
Rate limiting middleware for the OpenEventDatabase.
"""
import time
import threading
import falcon
from collections import defaultdict
from oedb.utils.logging import logger
class RateLimitMiddleware:
"""
Middleware that implements rate limiting to prevent API abuse.
This middleware tracks request rates by IP address and rejects requests
that exceed defined limits. It helps protect the API from abuse and
ensures fair usage.
"""
def __init__(self, window_size=60, max_requests=60):
"""
Initialize the middleware with rate limiting settings.
Args:
window_size: Time window in seconds for rate limiting.
max_requests: Maximum number of requests allowed per IP in the window.
"""
self.window_size = window_size
self.max_requests = max_requests
# Store request timestamps by IP
self.requests = defaultdict(list)
# Lock for thread safety
self.lock = threading.Lock()
# Define rate limit rules for different endpoints
# Format: (endpoint_prefix, method, max_requests)
self.rate_limit_rules = [
# Limit POST requests to /event to 10 per minute
('/event', 'POST', 10),
# Limit POST requests to /event/search to 20 per minute
('/event/search', 'POST', 20),
# Limit DELETE requests to /event to 5 per minute
('/event', 'DELETE', 5),
]
logger.info(f"Rate limiting initialized: {max_requests} requests per {window_size} seconds")
def process_request(self, req, resp):
"""
Process the request and apply rate limiting.
Args:
req: The request object.
resp: The response object.
Raises:
falcon.HTTPTooManyRequests: If the rate limit is exceeded.
"""
# Get client IP address
client_ip = self._get_client_ip(req)
# Skip rate limiting for local requests (for development)
if client_ip in ('127.0.0.1', 'localhost', '::1'):
return
# Get the appropriate rate limit for this endpoint
max_requests = self._get_max_requests(req)
# Check if the rate limit is exceeded
with self.lock:
# Clean up old requests
self._clean_old_requests(client_ip)
# Count recent requests
recent_requests = len(self.requests[client_ip])
# Check if the rate limit is exceeded
if recent_requests >= max_requests:
logger.warning(f"Rate limit exceeded for IP {client_ip}: {recent_requests} requests in {self.window_size} seconds")
retry_after = self.window_size - (int(time.time()) - self.requests[client_ip][0])
retry_after = max(1, retry_after) # Ensure retry_after is at least 1 second
# Add the request to the log for tracking abuse patterns
self._log_rate_limit_exceeded(client_ip, req)
# Raise an exception to reject the request
raise falcon.HTTPTooManyRequests(
title="Rate limit exceeded",
description=f"You have exceeded the rate limit of {max_requests} requests per {self.window_size} seconds",
headers={'Retry-After': str(retry_after)}
)
# Add the current request timestamp
self.requests[client_ip].append(int(time.time()))
def _get_client_ip(self, req):
"""
Get the client IP address from the request.
Args:
req: The request object.
Returns:
str: The client IP address.
"""
# Try to get the real IP from X-Forwarded-For header (if behind a proxy)
forwarded_for = req.get_header('X-Forwarded-For')
if forwarded_for:
# The client IP is the first address in the list
return forwarded_for.split(',')[0].strip()
# Fall back to the remote_addr
return req.remote_addr or '0.0.0.0'
def _clean_old_requests(self, client_ip):
"""
Remove request timestamps that are outside the current window.
Args:
client_ip: The client IP address.
"""
if client_ip not in self.requests:
return
current_time = int(time.time())
cutoff_time = current_time - self.window_size
# Keep only requests within the current window
self.requests[client_ip] = [t for t in self.requests[client_ip] if t > cutoff_time]
# Remove the IP from the dictionary if there are no recent requests
if not self.requests[client_ip]:
del self.requests[client_ip]
def _get_max_requests(self, req):
"""
Determine the maximum requests allowed for the current endpoint.
Args:
req: The request object.
Returns:
int: The maximum number of requests allowed.
"""
# Check if the request matches any rate limit rules
for endpoint, method, max_requests in self.rate_limit_rules:
if req.path.startswith(endpoint) and req.method == method:
return max_requests
# Default to the global max_requests
return self.max_requests
def _log_rate_limit_exceeded(self, client_ip, req):
"""
Log details when a rate limit is exceeded for analysis.
Args:
client_ip: The client IP address.
req: The request object.
"""
logger.warning(
f"Rate limit exceeded: IP={client_ip}, "
f"Method={req.method}, Path={req.path}, "
f"User-Agent={req.get_header('User-Agent', 'Unknown')}"
)

View file

@ -36,10 +36,12 @@ class BaseEvent:
properties.update({
'createdate': row['createdate'],
'lastupdate': row['lastupdate'],
'lon': row['lon'],
'lat': row['lat'],
"id": row['events_id']
})
# Ensure what property is used instead of description
if 'description' in properties and 'what' not in properties:
properties['what'] = properties.pop('description')
if 'secret' in properties: # hide secret in results
del properties['secret']
if "distance" in row:

View file

@ -67,7 +67,8 @@ class DemoResource:
<li><a href="/event" target="_blank">/event - Get Events</a></li>
<li><a href="/stats" target="_blank">/stats - Database Statistics</a></li>
</ul>
<p><a href="https://source.cipherbliss.com/tykayn/oedb-backend" target="_blank">Source Code on GitHub</a></p>
<p><a href="/demo/add" class="add-event-btn" style="display: block; text-align: center; margin-top: 15px; padding: 8px; background-color: #0078ff; color: white; border-radius: 4px; font-weight: bold;">+ Add New Event</a></p>
<p><a href="https://source.cipherbliss.com/tykayn/oedb-backend" target="_blank">Source Code on Cipherbliss</a></p>
</div>
<script>
@ -89,11 +90,8 @@ class DemoResource:
// Function to fetch events from the API
function fetchEvents() {
// Get current date in YYYY-MM-DD format
const today = new Date().toISOString().split('T')[0];
// Fetch events from the API
fetch('/event?when=' + today)
// Fetch events from the API - using default behavior to get currently active events
fetch('/event')
.then(response => response.json())
.then(data => {
if (data.features && data.features.length > 0) {

View file

@ -179,7 +179,8 @@ class EventResource(BaseEvent):
unused, event_stop = self.relative_time(req.params['stop'], cur)
event_when = "tstzrange(now(),%s,'[]')" % event_stop
else:
event_when = "tstzrange(now(),now(),'[]')"
# Return events that are currently active (current time is between start and stop)
event_when = "now()"
if 'what' in req.params:
# Limit search based on "what"
@ -209,10 +210,18 @@ class EventResource(BaseEvent):
event_geom = cur.mogrify("ST_SnapToGrid(geom,%s)", (req.params['geom'],)).decode("utf-8")
# Search recent active events.
sql = """SELECT events_id, events_tags, createdate, lastupdate, {event_dist} st_asgeojson({event_geom}) as geometry, st_x(geom_center) as lon, st_y(geom_center) as lat
FROM events JOIN geo ON (hash=events_geo)
WHERE events_when && {event_when} {event_what} {event_type} {event_bbox}
ORDER BY {event_sort} {limit}"""
if event_when == "now()":
# Use @> operator to check if events_when contains current time
sql = """SELECT events_id, events_tags, createdate, lastupdate, {event_dist} st_asgeojson({event_geom}) as geometry, st_x(geom_center) as lon, st_y(geom_center) as lat
FROM events JOIN geo ON (hash=events_geo)
WHERE events_when @> {event_when} {event_what} {event_type} {event_bbox}
ORDER BY {event_sort} {limit}"""
else:
# Use && operator to check if events_when overlaps with event_when
sql = """SELECT events_id, events_tags, createdate, lastupdate, {event_dist} st_asgeojson({event_geom}) as geometry, st_x(geom_center) as lon, st_y(geom_center) as lat
FROM events JOIN geo ON (hash=events_geo)
WHERE events_when && {event_when} {event_what} {event_type} {event_bbox}
ORDER BY {event_sort} {limit}"""
# No user generated content here, so format is safe.
sql = sql.format(event_dist=event_dist, event_geom=event_geom,
event_bbox=event_bbox, event_what=event_what,

View file

@ -0,0 +1,352 @@
"""
Event form resource for the OpenEventDatabase.
"""
import falcon
from oedb.utils.logging import logger
class EventFormResource:
"""
Resource for the event submission form.
Handles the /demo/add endpoint.
"""
def on_get(self, req, resp):
"""
Handle GET requests to the /demo/add endpoint.
Returns an HTML page with a form for adding events.
Args:
req: The request object.
resp: The response object.
"""
logger.info("Processing GET request to /demo/add")
try:
# Set content type to HTML
resp.content_type = 'text/html'
# Create HTML response with form
html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Add Event - OpenEventDatabase</title>
<script src="https://unpkg.com/maplibre-gl@3.0.0/dist/maplibre-gl.js"></script>
<link href="https://unpkg.com/maplibre-gl@3.0.0/dist/maplibre-gl.css" rel="stylesheet" />
<style>
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f5f5f5;
}
.container {
max-width: 1000px;
margin: 0 auto;
background-color: white;
padding: 20px;
border-radius: 5px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
h1 {
margin-top: 0;
color: #333;
}
.form-group {
margin-bottom: 15px;
}
label {
display: block;
margin-bottom: 5px;
font-weight: bold;
}
input[type="text"],
input[type="datetime-local"],
select,
textarea {
width: 100%;
padding: 8px;
border: 1px solid #ddd;
border-radius: 4px;
box-sizing: border-box;
font-size: 14px;
}
.required:after {
content: " *";
color: red;
}
.form-row {
display: flex;
gap: 15px;
}
.form-row .form-group {
flex: 1;
}
button {
background-color: #0078ff;
color: white;
border: none;
padding: 10px 15px;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
button:hover {
background-color: #0056b3;
}
.note {
font-size: 12px;
color: #666;
margin-top: 5px;
}
#map {
width: 100%;
height: 300px;
margin-bottom: 15px;
border-radius: 4px;
}
#result {
margin-top: 20px;
padding: 10px;
border-radius: 4px;
display: none;
}
#result.success {
background-color: #d4edda;
border: 1px solid #c3e6cb;
color: #155724;
}
#result.error {
background-color: #f8d7da;
border: 1px solid #f5c6cb;
color: #721c24;
}
.nav-links {
margin-bottom: 20px;
}
.nav-links a {
color: #0078ff;
text-decoration: none;
margin-right: 15px;
}
.nav-links a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="container">
<div class="nav-links">
<a href="/demo">&larr; Back to Map</a>
<a href="/">API Information</a>
<a href="/event">View Events</a>
</div>
<h1>Add New Event</h1>
<form id="eventForm">
<div class="form-group">
<label for="label" >Event Name</label>
<input type="text" id="label" name="label" >
</div>
<div class="form-row">
<div class="form-group">
<label for="type" >Event Type</label>
<select id="type" name="type" >
<option value="scheduled">Scheduled</option>
<option value="forecast">Forecast</option>
<option value="unscheduled">Unscheduled</option>
</select>
</div>
<div class="form-group">
<label for="what" >What</label>
<input type="text" id="what" name="what" placeholder="e.g., sport.match.football" >
<div class="note">Category of the event (e.g., sport.match.football, culture.festival)</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="what_series">What: Series</label>
<input type="text" id="what_series" name="what_series" placeholder="e.g., Euro 2024">
<div class="note">Series or group the event belongs to (e.g., Euro 2024, Summer Festival 2023)</div>
</div>
<div class="form-group">
<label for="where">Where</label>
<input type="text" id="where" name="where" placeholder="e.g., Stadium Name">
<div class="note">Specific location name (e.g., Eiffel Tower, Wembley Stadium)</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="start" >Start Time</label>
<input type="datetime-local" id="start" name="start" value="">
</div>
<div class="form-group">
<label for="stop" >End Time</label>
<input type="datetime-local" id="stop" name="stop" value="">
</div>
</div>
<div class="form-group">
<label >Location</label>
<div id="map"></div>
<div class="note">Click on the map to set the event location</div>
</div>
<button type="submit">Create Event</button>
</form>
<div id="result"></div>
</div>
<script>
// Set default date values (current day)
function setDefaultDates() {
const now = new Date();
const today = now.toISOString().slice(0, 16); // Format: YYYY-MM-DDThh:mm
// Set start time to current time
document.getElementById('start').value = today;
// Set end time to current time
document.getElementById('stop').value = today;
}
// Call function to set default dates
setDefaultDates();
// Initialize the map
const map = new maplibregl.Map({
container: 'map',
style: 'https://demotiles.maplibre.org/style.json',
center: [2.2137, 46.2276], // Default center (center of metropolitan France)
zoom: 5
});
// Add navigation controls
map.addControl(new maplibregl.NavigationControl());
// Add marker for event location
let marker = new maplibregl.Marker({
draggable: true
});
// Set default marker at the center of metropolitan France
marker.setLngLat([2.2137, 46.2276]).addTo(map);
// Add marker on map click
map.on('click', function(e) {
marker.setLngLat(e.lngLat).addTo(map);
});
// Handle form submission
document.getElementById('eventForm').addEventListener('submit', function(e) {
e.preventDefault();
// Get form values
const label = document.getElementById('label').value;
const type = document.getElementById('type').value;
const what = document.getElementById('what').value;
const what_series = document.getElementById('what_series').value;
const where = document.getElementById('where').value;
const start = document.getElementById('start').value;
const stop = document.getElementById('stop').value;
// Check if marker is set
if (!marker.getLngLat()) {
showResult('Please set a location by clicking on the map', 'error');
return;
}
// Get marker coordinates
const lngLat = marker.getLngLat();
// Create event object
const event = {
type: 'Feature',
geometry: {
type: 'Point',
coordinates: [lngLat.lng, lngLat.lat]
},
properties: {
label: label,
type: type,
what: what,
start: start,
stop: stop
}
};
// Add optional properties if provided
if (what_series) {
event.properties['what:series'] = what_series;
}
if (where) {
event.properties.where = where;
}
// Submit event to API
fetch('/event', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(event)
})
.then(response => {
if (response.ok) {
return response.json();
} else {
return response.text().then(text => {
throw new Error(text || response.statusText);
});
}
})
.then(data => {
showResult(`Event created successfully with ID: ${data.id}`, 'success');
// Reset form
document.getElementById('eventForm').reset();
// Remove marker
marker.remove();
})
.catch(error => {
showResult(`Error creating event: ${error.message}`, 'error');
});
});
// Show result message
function showResult(message, type) {
const resultElement = document.getElementById('result');
resultElement.textContent = message;
resultElement.className = type;
resultElement.style.display = 'block';
// Scroll to result
resultElement.scrollIntoView({ behavior: 'smooth' });
}
</script>
</body>
</html>
"""
# Set the response body and status
resp.text = html
resp.status = falcon.HTTP_200
logger.success("Successfully processed GET request to /demo/add")
except Exception as e:
logger.error(f"Error processing GET request to /demo/add: {e}")
resp.status = falcon.HTTP_500
resp.text = f"Error: {str(e)}"
# Create a global instance of EventFormResource
event_form = EventFormResource()