add extractors, rate limit, demo submit form

2025-09-16 00:46:09 +02:00 · 2025-09-16 00:46:09 +02:00 · 2157091778
commit 2157091778
parent cc870323bf
12 changed files with 1612 additions and 14 deletions
--- a/oedb/middleware/cache.py
+++ b/oedb/middleware/cache.py
@ -0,0 +1,98 @@
+"""
+Caching middleware for the OpenEventDatabase.
+"""
+
+from oedb.utils.logging import logger
+
+class CacheMiddleware:
+    """
+    Middleware that adds caching headers to responses.
+    
+    This middleware adds appropriate Cache-Control headers to responses
+    based on the endpoint and request method. It helps reduce server load
+    by allowing clients to cache responses for a specified period.
+    """
+    
+    def __init__(self, default_max_age=60):
+        """
+        Initialize the middleware with default caching settings.
+        
+        Args:
+            default_max_age: Default max-age in seconds for cacheable responses.
+        """
+        self.default_max_age = default_max_age
+        
+        # Define caching rules for different endpoints
+        # Format: (endpoint_prefix, method, max_age)
+        self.caching_rules = [
+            # Cache GET requests to /event for 60 seconds
+            ('/event', 'GET', 60),
+            # Cache GET requests to /stats for 300 seconds (5 minutes)
+            ('/stats', 'GET', 300),
+            # Cache GET requests to /demo for 3600 seconds (1 hour)
+            ('/demo', 'GET', 3600),
+            # Don't cache search results
+            ('/event/search', 'POST', 0),
+        ]
+    
+    def process_response(self, req, resp, resource, params):
+        """
+        Add caching headers to the response.
+        
+        Args:
+            req: The request object.
+            resp: The response object.
+            resource: The resource object.
+            params: The request parameters.
+        """
+        # Don't add caching headers for error responses
+        if resp.status_code >= 400:
+            self._add_no_cache_headers(resp)
+            return
+        
+        # Check if the request matches any caching rules
+        max_age = self._get_max_age(req)
+        
+        if max_age > 0:
+            # Add caching headers
+            logger.debug(f"Adding caching headers with max-age={max_age} to {req.method} {req.path}")
+            resp.set_header('Cache-Control', f'public, max-age={max_age}')
+            resp.set_header('Vary', 'Accept-Encoding')
+        else:
+            # Add no-cache headers
+            self._add_no_cache_headers(resp)
+    
+    def _get_max_age(self, req):
+        """
+        Determine the max-age value for the current request.
+        
+        Args:
+            req: The request object.
+            
+        Returns:
+            int: The max-age value in seconds, or 0 for no caching.
+        """
+        # Check if the request matches any caching rules
+        for endpoint, method, max_age in self.caching_rules:
+            if req.path.startswith(endpoint) and req.method == method:
+                return max_age
+        
+        # Default: no caching for write operations, default max-age for read operations
+        if req.method in ('POST', 'PUT', 'DELETE', 'PATCH'):
+            return 0
+        elif req.method == 'GET':
+            return self.default_max_age
+        else:
+            return 0
+    
+    def _add_no_cache_headers(self, resp):
+        """
+        Add headers to prevent caching.
+        
+        Args:
+            resp: The response object.
+        """
+        logger.debug("Adding no-cache headers to response")
+        resp.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
+        resp.set_header('Pragma', 'no-cache')
+        resp.set_header('Expires', '0')
--- a/oedb/middleware/rate_limit.py
+++ b/oedb/middleware/rate_limit.py
@ -0,0 +1,167 @@
+"""
+Rate limiting middleware for the OpenEventDatabase.
+"""
+
+import time
+import threading
+import falcon
+from collections import defaultdict
+from oedb.utils.logging import logger
+
+class RateLimitMiddleware:
+    """
+    Middleware that implements rate limiting to prevent API abuse.
+    
+    This middleware tracks request rates by IP address and rejects requests
+    that exceed defined limits. It helps protect the API from abuse and
+    ensures fair usage.
+    """
+    
+    def __init__(self, window_size=60, max_requests=60):
+        """
+        Initialize the middleware with rate limiting settings.
+        
+        Args:
+            window_size: Time window in seconds for rate limiting.
+            max_requests: Maximum number of requests allowed per IP in the window.
+        """
+        self.window_size = window_size
+        self.max_requests = max_requests
+        
+        # Store request timestamps by IP
+        self.requests = defaultdict(list)
+        
+        # Lock for thread safety
+        self.lock = threading.Lock()
+        
+        # Define rate limit rules for different endpoints
+        # Format: (endpoint_prefix, method, max_requests)
+        self.rate_limit_rules = [
+            # Limit POST requests to /event to 10 per minute
+            ('/event', 'POST', 10),
+            # Limit POST requests to /event/search to 20 per minute
+            ('/event/search', 'POST', 20),
+            # Limit DELETE requests to /event to 5 per minute
+            ('/event', 'DELETE', 5),
+        ]
+        
+        logger.info(f"Rate limiting initialized: {max_requests} requests per {window_size} seconds")
+    
+    def process_request(self, req, resp):
+        """
+        Process the request and apply rate limiting.
+        
+        Args:
+            req: The request object.
+            resp: The response object.
+        
+        Raises:
+            falcon.HTTPTooManyRequests: If the rate limit is exceeded.
+        """
+        # Get client IP address
+        client_ip = self._get_client_ip(req)
+        
+        # Skip rate limiting for local requests (for development)
+        if client_ip in ('127.0.0.1', 'localhost', '::1'):
+            return
+        
+        # Get the appropriate rate limit for this endpoint
+        max_requests = self._get_max_requests(req)
+        
+        # Check if the rate limit is exceeded
+        with self.lock:
+            # Clean up old requests
+            self._clean_old_requests(client_ip)
+            
+            # Count recent requests
+            recent_requests = len(self.requests[client_ip])
+            
+            # Check if the rate limit is exceeded
+            if recent_requests >= max_requests:
+                logger.warning(f"Rate limit exceeded for IP {client_ip}: {recent_requests} requests in {self.window_size} seconds")
+                retry_after = self.window_size - (int(time.time()) - self.requests[client_ip][0])
+                retry_after = max(1, retry_after)  # Ensure retry_after is at least 1 second
+                
+                # Add the request to the log for tracking abuse patterns
+                self._log_rate_limit_exceeded(client_ip, req)
+                
+                # Raise an exception to reject the request
+                raise falcon.HTTPTooManyRequests(
+                    title="Rate limit exceeded",
+                    description=f"You have exceeded the rate limit of {max_requests} requests per {self.window_size} seconds",
+                    headers={'Retry-After': str(retry_after)}
+                )
+            
+            # Add the current request timestamp
+            self.requests[client_ip].append(int(time.time()))
+    
+    def _get_client_ip(self, req):
+        """
+        Get the client IP address from the request.
+        
+        Args:
+            req: The request object.
+            
+        Returns:
+            str: The client IP address.
+        """
+        # Try to get the real IP from X-Forwarded-For header (if behind a proxy)
+        forwarded_for = req.get_header('X-Forwarded-For')
+        if forwarded_for:
+            # The client IP is the first address in the list
+            return forwarded_for.split(',')[0].strip()
+        
+        # Fall back to the remote_addr
+        return req.remote_addr or '0.0.0.0'
+    
+    def _clean_old_requests(self, client_ip):
+        """
+        Remove request timestamps that are outside the current window.
+        
+        Args:
+            client_ip: The client IP address.
+        """
+        if client_ip not in self.requests:
+            return
+        
+        current_time = int(time.time())
+        cutoff_time = current_time - self.window_size
+        
+        # Keep only requests within the current window
+        self.requests[client_ip] = [t for t in self.requests[client_ip] if t > cutoff_time]
+        
+        # Remove the IP from the dictionary if there are no recent requests
+        if not self.requests[client_ip]:
+            del self.requests[client_ip]
+    
+    def _get_max_requests(self, req):
+        """
+        Determine the maximum requests allowed for the current endpoint.
+        
+        Args:
+            req: The request object.
+            
+        Returns:
+            int: The maximum number of requests allowed.
+        """
+        # Check if the request matches any rate limit rules
+        for endpoint, method, max_requests in self.rate_limit_rules:
+            if req.path.startswith(endpoint) and req.method == method:
+                return max_requests
+        
+        # Default to the global max_requests
+        return self.max_requests
+    
+    def _log_rate_limit_exceeded(self, client_ip, req):
+        """
+        Log details when a rate limit is exceeded for analysis.
+        
+        Args:
+            client_ip: The client IP address.
+            req: The request object.
+        """
+        logger.warning(
+            f"Rate limit exceeded: IP={client_ip}, "
+            f"Method={req.method}, Path={req.path}, "
+            f"User-Agent={req.get_header('User-Agent', 'Unknown')}"
+        )