add extractors, rate limit, demo submit form

This commit is contained in:
Tykayn 2025-09-16 00:46:09 +02:00 committed by tykayn
parent cc870323bf
commit 2157091778
12 changed files with 1612 additions and 14 deletions

98
oedb/middleware/cache.py Normal file
View file

@ -0,0 +1,98 @@
"""
Caching middleware for the OpenEventDatabase.
"""
from oedb.utils.logging import logger
class CacheMiddleware:
"""
Middleware that adds caching headers to responses.
This middleware adds appropriate Cache-Control headers to responses
based on the endpoint and request method. It helps reduce server load
by allowing clients to cache responses for a specified period.
"""
def __init__(self, default_max_age=60):
"""
Initialize the middleware with default caching settings.
Args:
default_max_age: Default max-age in seconds for cacheable responses.
"""
self.default_max_age = default_max_age
# Define caching rules for different endpoints
# Format: (endpoint_prefix, method, max_age)
self.caching_rules = [
# Cache GET requests to /event for 60 seconds
('/event', 'GET', 60),
# Cache GET requests to /stats for 300 seconds (5 minutes)
('/stats', 'GET', 300),
# Cache GET requests to /demo for 3600 seconds (1 hour)
('/demo', 'GET', 3600),
# Don't cache search results
('/event/search', 'POST', 0),
]
def process_response(self, req, resp, resource, params):
"""
Add caching headers to the response.
Args:
req: The request object.
resp: The response object.
resource: The resource object.
params: The request parameters.
"""
# Don't add caching headers for error responses
if resp.status_code >= 400:
self._add_no_cache_headers(resp)
return
# Check if the request matches any caching rules
max_age = self._get_max_age(req)
if max_age > 0:
# Add caching headers
logger.debug(f"Adding caching headers with max-age={max_age} to {req.method} {req.path}")
resp.set_header('Cache-Control', f'public, max-age={max_age}')
resp.set_header('Vary', 'Accept-Encoding')
else:
# Add no-cache headers
self._add_no_cache_headers(resp)
def _get_max_age(self, req):
"""
Determine the max-age value for the current request.
Args:
req: The request object.
Returns:
int: The max-age value in seconds, or 0 for no caching.
"""
# Check if the request matches any caching rules
for endpoint, method, max_age in self.caching_rules:
if req.path.startswith(endpoint) and req.method == method:
return max_age
# Default: no caching for write operations, default max-age for read operations
if req.method in ('POST', 'PUT', 'DELETE', 'PATCH'):
return 0
elif req.method == 'GET':
return self.default_max_age
else:
return 0
def _add_no_cache_headers(self, resp):
"""
Add headers to prevent caching.
Args:
resp: The response object.
"""
logger.debug("Adding no-cache headers to response")
resp.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
resp.set_header('Pragma', 'no-cache')
resp.set_header('Expires', '0')

View file

@ -0,0 +1,167 @@
"""
Rate limiting middleware for the OpenEventDatabase.
"""
import time
import threading
import falcon
from collections import defaultdict
from oedb.utils.logging import logger
class RateLimitMiddleware:
"""
Middleware that implements rate limiting to prevent API abuse.
This middleware tracks request rates by IP address and rejects requests
that exceed defined limits. It helps protect the API from abuse and
ensures fair usage.
"""
def __init__(self, window_size=60, max_requests=60):
"""
Initialize the middleware with rate limiting settings.
Args:
window_size: Time window in seconds for rate limiting.
max_requests: Maximum number of requests allowed per IP in the window.
"""
self.window_size = window_size
self.max_requests = max_requests
# Store request timestamps by IP
self.requests = defaultdict(list)
# Lock for thread safety
self.lock = threading.Lock()
# Define rate limit rules for different endpoints
# Format: (endpoint_prefix, method, max_requests)
self.rate_limit_rules = [
# Limit POST requests to /event to 10 per minute
('/event', 'POST', 10),
# Limit POST requests to /event/search to 20 per minute
('/event/search', 'POST', 20),
# Limit DELETE requests to /event to 5 per minute
('/event', 'DELETE', 5),
]
logger.info(f"Rate limiting initialized: {max_requests} requests per {window_size} seconds")
def process_request(self, req, resp):
"""
Process the request and apply rate limiting.
Args:
req: The request object.
resp: The response object.
Raises:
falcon.HTTPTooManyRequests: If the rate limit is exceeded.
"""
# Get client IP address
client_ip = self._get_client_ip(req)
# Skip rate limiting for local requests (for development)
if client_ip in ('127.0.0.1', 'localhost', '::1'):
return
# Get the appropriate rate limit for this endpoint
max_requests = self._get_max_requests(req)
# Check if the rate limit is exceeded
with self.lock:
# Clean up old requests
self._clean_old_requests(client_ip)
# Count recent requests
recent_requests = len(self.requests[client_ip])
# Check if the rate limit is exceeded
if recent_requests >= max_requests:
logger.warning(f"Rate limit exceeded for IP {client_ip}: {recent_requests} requests in {self.window_size} seconds")
retry_after = self.window_size - (int(time.time()) - self.requests[client_ip][0])
retry_after = max(1, retry_after) # Ensure retry_after is at least 1 second
# Add the request to the log for tracking abuse patterns
self._log_rate_limit_exceeded(client_ip, req)
# Raise an exception to reject the request
raise falcon.HTTPTooManyRequests(
title="Rate limit exceeded",
description=f"You have exceeded the rate limit of {max_requests} requests per {self.window_size} seconds",
headers={'Retry-After': str(retry_after)}
)
# Add the current request timestamp
self.requests[client_ip].append(int(time.time()))
def _get_client_ip(self, req):
"""
Get the client IP address from the request.
Args:
req: The request object.
Returns:
str: The client IP address.
"""
# Try to get the real IP from X-Forwarded-For header (if behind a proxy)
forwarded_for = req.get_header('X-Forwarded-For')
if forwarded_for:
# The client IP is the first address in the list
return forwarded_for.split(',')[0].strip()
# Fall back to the remote_addr
return req.remote_addr or '0.0.0.0'
def _clean_old_requests(self, client_ip):
"""
Remove request timestamps that are outside the current window.
Args:
client_ip: The client IP address.
"""
if client_ip not in self.requests:
return
current_time = int(time.time())
cutoff_time = current_time - self.window_size
# Keep only requests within the current window
self.requests[client_ip] = [t for t in self.requests[client_ip] if t > cutoff_time]
# Remove the IP from the dictionary if there are no recent requests
if not self.requests[client_ip]:
del self.requests[client_ip]
def _get_max_requests(self, req):
"""
Determine the maximum requests allowed for the current endpoint.
Args:
req: The request object.
Returns:
int: The maximum number of requests allowed.
"""
# Check if the request matches any rate limit rules
for endpoint, method, max_requests in self.rate_limit_rules:
if req.path.startswith(endpoint) and req.method == method:
return max_requests
# Default to the global max_requests
return self.max_requests
def _log_rate_limit_exceeded(self, client_ip, req):
"""
Log details when a rate limit is exceeded for analysis.
Args:
client_ip: The client IP address.
req: The request object.
"""
logger.warning(
f"Rate limit exceeded: IP={client_ip}, "
f"Method={req.method}, Path={req.path}, "
f"User-Agent={req.get_header('User-Agent', 'Unknown')}"
)