| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | #!/usr/bin/env python3 | 
					
						
							|  |  |  |  | """
 | 
					
						
							|  |  |  |  | OSM Calendar Extractor for the OpenEventDatabase. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | This script fetches events from the OpenStreetMap Calendar RSS feed | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | and adds them to the OpenEventDatabase via the API. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | For events that don't have geographic coordinates in the RSS feed but have a link | 
					
						
							|  |  |  |  | to an OSM Calendar event (https://osmcal.org/event/...), the script will fetch | 
					
						
							|  |  |  |  | the iCal version of the event and extract the coordinates and location from there. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | RSS Feed URL: https://osmcal.org/events.rss | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | API Endpoint: https://api.openeventdatabase.org/event | 
					
						
							| 
									
										
										
										
											2025-09-18 22:18:25 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | Usage: | 
					
						
							|  |  |  |  |     python osm_cal.py [--max-events MAX_EVENTS] [--offset OFFSET] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | Arguments: | 
					
						
							|  |  |  |  |     --max-events MAX_EVENTS  Maximum number of events to insert (default: 1) | 
					
						
							|  |  |  |  |     --offset OFFSET          Number of events to skip from the beginning of the RSS feed (default: 0) | 
					
						
							| 
									
										
										
										
											2025-09-18 22:18:25 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | Examples: | 
					
						
							|  |  |  |  |     # Insert the first event from the RSS feed | 
					
						
							|  |  |  |  |     python osm_cal.py | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Insert up to 5 events from the RSS feed | 
					
						
							|  |  |  |  |     python osm_cal.py --max-events 5 | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Skip the first 3 events and insert the next 2 | 
					
						
							|  |  |  |  |     python osm_cal.py --offset 3 --max-events 2 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | Environment Variables: | 
					
						
							|  |  |  |  |     These environment variables can be set in the system environment or in a .env file | 
					
						
							|  |  |  |  |     in the project root directory. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | """
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import json | 
					
						
							|  |  |  |  | import requests | 
					
						
							|  |  |  |  | import sys | 
					
						
							|  |  |  |  | import os | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | import xml.etree.ElementTree as ET | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | import re | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | import html | 
					
						
							|  |  |  |  | from datetime import datetime, timedelta | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  | from bs4 import BeautifulSoup | 
					
						
							|  |  |  |  | import unicodedata | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | # Add the parent directory to the path so we can import from oedb | 
					
						
							|  |  |  |  | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 22:18:25 +02:00
										 |  |  |  | from oedb.utils.db import db_connect, load_env_from_file | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | from oedb.utils.logging import logger | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | # RSS Feed URL for OSM Calendar | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | RSS_URL = "https://osmcal.org/events.rss" | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | # Base URL for OSM Calendar events | 
					
						
							|  |  |  |  | OSMCAL_EVENT_BASE_URL = "https://osmcal.org/event/" | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  | # Main OSM Calendar page | 
					
						
							|  |  |  |  | OSMCAL_MAIN_URL = "https://osmcal.org" | 
					
						
							|  |  |  |  | # Cache file for processed events | 
					
						
							|  |  |  |  | CACHE_FILE = os.path.join(os.path.dirname(__file__), 'osm_cal_cache.json') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def fix_encoding(text): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Corrige les problèmes d'encodage UTF-8 courants. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         text (str): Texte potentiellement mal encodé | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         str: Texte avec l'encodage corrigé | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not text: | 
					
						
							|  |  |  |  |         return text | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         # Essayer de détecter et corriger l'encodage double UTF-8 | 
					
						
							|  |  |  |  |         # (UTF-8 interprété comme Latin-1 puis réencodé en UTF-8) | 
					
						
							|  |  |  |  |         if 'Ã' in text: | 
					
						
							|  |  |  |  |             # Encoder en latin-1 puis décoder en UTF-8 | 
					
						
							|  |  |  |  |             corrected = text.encode('latin-1').decode('utf-8') | 
					
						
							|  |  |  |  |             logger.info(f"Encodage corrigé : '{text}' -> '{corrected}'") | 
					
						
							|  |  |  |  |             return corrected | 
					
						
							|  |  |  |  |     except (UnicodeEncodeError, UnicodeDecodeError): | 
					
						
							|  |  |  |  |         # Si la correction échoue, essayer d'autres méthodes | 
					
						
							|  |  |  |  |         try: | 
					
						
							|  |  |  |  |             # Normaliser les caractères Unicode | 
					
						
							|  |  |  |  |             normalized = unicodedata.normalize('NFKD', text) | 
					
						
							|  |  |  |  |             return normalized | 
					
						
							|  |  |  |  |         except: | 
					
						
							|  |  |  |  |             pass | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Si aucune correction ne fonctionne, retourner le texte original | 
					
						
							|  |  |  |  |     return text | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def load_event_cache(): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Charge le cache des événements traités depuis le fichier JSON. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         dict: Dictionnaire des événements avec leur statut de traitement | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if os.path.exists(CACHE_FILE): | 
					
						
							|  |  |  |  |         try: | 
					
						
							|  |  |  |  |             with open(CACHE_FILE, 'r', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |                 cache = json.load(f) | 
					
						
							|  |  |  |  |                 logger.info(f"Cache chargé : {len(cache)} événements en cache") | 
					
						
							|  |  |  |  |                 return cache | 
					
						
							|  |  |  |  |         except Exception as e: | 
					
						
							|  |  |  |  |             logger.error(f"Erreur lors du chargement du cache : {e}") | 
					
						
							|  |  |  |  |             return {} | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         logger.info("Aucun cache trouvé, création d'un nouveau cache") | 
					
						
							|  |  |  |  |         return {} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def save_event_cache(cache): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Sauvegarde le cache des événements dans le fichier JSON. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         cache (dict): Dictionnaire des événements avec leur statut | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         with open(CACHE_FILE, 'w', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |             json.dump(cache, f, indent=2, ensure_ascii=False) | 
					
						
							|  |  |  |  |         logger.info(f"Cache sauvegardé : {len(cache)} événements") | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Erreur lors de la sauvegarde du cache : {e}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def scrape_osmcal_event_links(): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Scrape la page principale d'osmcal.org pour extraire tous les liens d'événements. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         list: Liste des URLs d'événements trouvés | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     logger.info(f"Scraping de la page principale : {OSMCAL_MAIN_URL}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         headers = { | 
					
						
							|  |  |  |  |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |         response = requests.get(OSMCAL_MAIN_URL, headers=headers) | 
					
						
							|  |  |  |  |         response.raise_for_status() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         soup = BeautifulSoup(response.content, 'html.parser') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Debugging : sauvegarder le HTML pour inspection | 
					
						
							|  |  |  |  |         debug_file = os.path.join(os.path.dirname(__file__), 'osmcal_debug.html') | 
					
						
							|  |  |  |  |         with open(debug_file, 'w', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |             f.write(response.text) | 
					
						
							|  |  |  |  |         logger.info(f"HTML de débogage sauvegardé dans : {debug_file}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         event_links = [] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Essayer différents sélecteurs basés sur la structure HTML fournie | 
					
						
							|  |  |  |  |         selectors_to_try = [ | 
					
						
							|  |  |  |  |             'a.event-list-entry-box',  # Sélecteur principal basé sur l'exemple HTML | 
					
						
							|  |  |  |  |             'li.event-list-entry a',   # Sélecteur alternatif basé sur la structure | 
					
						
							|  |  |  |  |             '.event-list-entry a',     # Variation sans spécifier le tag li | 
					
						
							|  |  |  |  |             'a[href*="/event/"]',      # Tous les liens contenant "/event/" | 
					
						
							|  |  |  |  |             '.event-list-entry-box'    # Au cas où ce serait juste la classe | 
					
						
							|  |  |  |  |         ] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         for selector in selectors_to_try: | 
					
						
							|  |  |  |  |             logger.info(f"Essai du sélecteur : {selector}") | 
					
						
							|  |  |  |  |             elements = soup.select(selector) | 
					
						
							|  |  |  |  |             logger.info(f"Trouvé {len(elements)} éléments avec le sélecteur {selector}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             if elements: | 
					
						
							|  |  |  |  |                 for element in elements: | 
					
						
							|  |  |  |  |                     href = None | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                     # Si l'élément est déjà un lien | 
					
						
							|  |  |  |  |                     if element.name == 'a' and element.get('href'): | 
					
						
							|  |  |  |  |                         href = element.get('href') | 
					
						
							|  |  |  |  |                     # Si l'élément contient un lien | 
					
						
							|  |  |  |  |                     elif element.name != 'a': | 
					
						
							|  |  |  |  |                         link_element = element.find('a') | 
					
						
							|  |  |  |  |                         if link_element and link_element.get('href'): | 
					
						
							|  |  |  |  |                             href = link_element.get('href') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                     if href: | 
					
						
							|  |  |  |  |                         # Construire l'URL complète si c'est un lien relatif | 
					
						
							|  |  |  |  |                         if href.startswith('/'): | 
					
						
							|  |  |  |  |                             # Enlever les paramètres de requête de l'URL de base | 
					
						
							|  |  |  |  |                             base_url = OSMCAL_MAIN_URL.split('?')[0] | 
					
						
							|  |  |  |  |                             if base_url.endswith('/'): | 
					
						
							|  |  |  |  |                                 base_url = base_url[:-1] | 
					
						
							|  |  |  |  |                             full_url = base_url + href | 
					
						
							|  |  |  |  |                         else: | 
					
						
							|  |  |  |  |                             full_url = href | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                         # Vérifier que c'est bien un lien vers un événement | 
					
						
							|  |  |  |  |                         if '/event/' in href and full_url not in event_links: | 
					
						
							|  |  |  |  |                             event_links.append(full_url) | 
					
						
							|  |  |  |  |                             logger.info(f"Lien d'événement trouvé : {full_url}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                 # Si on a trouvé des liens avec ce sélecteur, on s'arrête | 
					
						
							|  |  |  |  |                 if event_links: | 
					
						
							|  |  |  |  |                     break | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Si aucun lien trouvé, essayer de lister tous les liens pour débugger | 
					
						
							|  |  |  |  |         if not event_links: | 
					
						
							|  |  |  |  |             logger.warning("Aucun lien d'événement trouvé. Listing de tous les liens pour débogage :") | 
					
						
							|  |  |  |  |             all_links = soup.find_all('a', href=True) | 
					
						
							|  |  |  |  |             logger.info(f"Total de liens trouvés sur la page : {len(all_links)}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Afficher les 10 premiers liens pour débogage | 
					
						
							|  |  |  |  |             for i, link in enumerate(all_links[:10]): | 
					
						
							|  |  |  |  |                 logger.info(f"Lien {i+1}: {link.get('href')} (classes: {link.get('class', [])})") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Chercher spécifiquement les liens contenant "event" | 
					
						
							|  |  |  |  |             event_related_links = [link for link in all_links if 'event' in link.get('href', '').lower()] | 
					
						
							|  |  |  |  |             logger.info(f"Liens contenant 'event' : {len(event_related_links)}") | 
					
						
							|  |  |  |  |             for link in event_related_links[:5]: | 
					
						
							|  |  |  |  |                 logger.info(f"Lien event: {link.get('href')}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         logger.success(f"Trouvé {len(event_links)} liens d'événements uniques sur la page principale") | 
					
						
							|  |  |  |  |         return event_links | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     except requests.exceptions.RequestException as e: | 
					
						
							|  |  |  |  |         logger.error(f"Erreur lors du scraping de osmcal.org : {e}") | 
					
						
							|  |  |  |  |         return [] | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Erreur inattendue lors du scraping : {e}") | 
					
						
							|  |  |  |  |         import traceback | 
					
						
							|  |  |  |  |         logger.error(f"Traceback: {traceback.format_exc()}") | 
					
						
							|  |  |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | def fetch_osm_calendar_data(): | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Fetch events from the OSM Calendar RSS feed. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Returns: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         list: A list of event items from the RSS feed. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     logger.info("Fetching data from OSM Calendar RSS feed") | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         response = requests.get(RSS_URL) | 
					
						
							|  |  |  |  |         response.raise_for_status()  # Raise an exception for HTTP errors | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Parse the XML response | 
					
						
							|  |  |  |  |         root = ET.fromstring(response.content) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Find all item elements (events) | 
					
						
							|  |  |  |  |         channel = root.find('channel') | 
					
						
							|  |  |  |  |         if channel is None: | 
					
						
							|  |  |  |  |             logger.error("No channel element found in RSS feed") | 
					
						
							|  |  |  |  |             return [] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         items = channel.findall('item') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if not items: | 
					
						
							|  |  |  |  |             logger.error("No items found in RSS feed") | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |             return [] | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         logger.success(f"Successfully fetched {len(items)} events from OSM Calendar RSS feed") | 
					
						
							|  |  |  |  |         return items | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     except requests.exceptions.RequestException as e: | 
					
						
							|  |  |  |  |         logger.error(f"Error fetching data from OSM Calendar RSS feed: {e}") | 
					
						
							|  |  |  |  |         return [] | 
					
						
							|  |  |  |  |     except ET.ParseError as e: | 
					
						
							|  |  |  |  |         logger.error(f"Error parsing XML response: {e}") | 
					
						
							|  |  |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     except Exception as e: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         logger.error(f"Unexpected error fetching OSM Calendar data: {e}") | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         return [] | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | def parse_event_dates(description): | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Parse event dates from the description. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Args: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         description (str): The event description HTML. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Returns: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         tuple: A tuple containing (start_date, end_date) as ISO format strings. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     try: | 
					
						
							|  |  |  |  |         # Extract the date information from the description | 
					
						
							|  |  |  |  |         date_pattern = r'(\d+)(?:st|nd|rd|th)\s+(\w+)(?:\s+(\d+):(\d+)(?:\s+–\s+(\d+):(\d+))?)?(?:\s+\(([^)]+)\))?(?:\s+–\s+(\d+)(?:st|nd|rd|th)\s+(\w+))?' | 
					
						
							|  |  |  |  |         date_match = re.search(date_pattern, description) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if not date_match: | 
					
						
							|  |  |  |  |             # Try alternative pattern for single day with time range | 
					
						
							|  |  |  |  |             date_pattern = r'(\d+)(?:st|nd|rd|th)\s+(\w+)\s+(\d+):(\d+)\s+–\s+(\d+):(\d+)' | 
					
						
							|  |  |  |  |             date_match = re.search(date_pattern, description) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if date_match: | 
					
						
							|  |  |  |  |             # Extract date components | 
					
						
							|  |  |  |  |             day = int(date_match.group(1)) | 
					
						
							|  |  |  |  |             month_name = date_match.group(2) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Convert month name to month number | 
					
						
							|  |  |  |  |             month_map = { | 
					
						
							|  |  |  |  |                 'January': 1, 'February': 2, 'March': 3, 'April': 4, | 
					
						
							|  |  |  |  |                 'May': 5, 'June': 6, 'July': 7, 'August': 8, | 
					
						
							|  |  |  |  |                 'September': 9, 'October': 10, 'November': 11, 'December': 12 | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Try to match the month name (case insensitive) | 
					
						
							|  |  |  |  |             month = None | 
					
						
							|  |  |  |  |             for name, num in month_map.items(): | 
					
						
							|  |  |  |  |                 if month_name.lower() == name.lower(): | 
					
						
							|  |  |  |  |                     month = num | 
					
						
							|  |  |  |  |                     break | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             if month is None: | 
					
						
							|  |  |  |  |                 # If month name not found, use current month | 
					
						
							|  |  |  |  |                 month = datetime.now().month | 
					
						
							|  |  |  |  |                 logger.warning(f"Could not parse month name: {month_name}, using current month") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Get current year (assuming events are current or future) | 
					
						
							|  |  |  |  |             current_year = datetime.now().year | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Create start date | 
					
						
							|  |  |  |  |             try: | 
					
						
							|  |  |  |  |                 start_date = datetime(current_year, month, day) | 
					
						
							|  |  |  |  |             except ValueError: | 
					
						
							|  |  |  |  |                 # Handle invalid dates (e.g., February 30) | 
					
						
							|  |  |  |  |                 logger.warning(f"Invalid date: {day} {month_name} {current_year}, using current date") | 
					
						
							|  |  |  |  |                 start_date = datetime.now() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Check if there's an end date | 
					
						
							|  |  |  |  |             if len(date_match.groups()) >= 8 and date_match.group(8): | 
					
						
							|  |  |  |  |                 end_day = int(date_match.group(8)) | 
					
						
							|  |  |  |  |                 end_month_name = date_match.group(9) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                 # Convert end month name to month number | 
					
						
							|  |  |  |  |                 end_month = None | 
					
						
							|  |  |  |  |                 for name, num in month_map.items(): | 
					
						
							|  |  |  |  |                     if end_month_name.lower() == name.lower(): | 
					
						
							|  |  |  |  |                         end_month = num | 
					
						
							|  |  |  |  |                         break | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                 if end_month is None: | 
					
						
							|  |  |  |  |                     # If end month name not found, use start month | 
					
						
							|  |  |  |  |                     end_month = month | 
					
						
							|  |  |  |  |                     logger.warning(f"Could not parse end month name: {end_month_name}, using start month") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                 try: | 
					
						
							|  |  |  |  |                     end_date = datetime(current_year, end_month, end_day) | 
					
						
							|  |  |  |  |                     # Add a day to include the full end day | 
					
						
							|  |  |  |  |                     end_date = end_date + timedelta(days=1) | 
					
						
							|  |  |  |  |                 except ValueError: | 
					
						
							|  |  |  |  |                     # Handle invalid dates | 
					
						
							|  |  |  |  |                     logger.warning(f"Invalid end date: {end_day} {end_month_name} {current_year}, using start date + 1 day") | 
					
						
							|  |  |  |  |                     end_date = start_date + timedelta(days=1) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 # If no end date, use start date + 1 day as default | 
					
						
							|  |  |  |  |                 end_date = start_date + timedelta(days=1) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Format dates as ISO strings | 
					
						
							|  |  |  |  |             start_iso = start_date.isoformat() | 
					
						
							|  |  |  |  |             end_iso = end_date.isoformat() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             return (start_iso, end_iso) | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             # If no date pattern found, use current date as fallback | 
					
						
							|  |  |  |  |             now = datetime.now() | 
					
						
							|  |  |  |  |             start_iso = now.isoformat() | 
					
						
							|  |  |  |  |             end_iso = (now + timedelta(days=1)).isoformat() | 
					
						
							|  |  |  |  |             logger.warning(f"Could not parse date from description, using current date: {start_iso} to {end_iso}") | 
					
						
							|  |  |  |  |             return (start_iso, end_iso) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Error parsing event dates: {e}") | 
					
						
							|  |  |  |  |         # Return default dates (current date) | 
					
						
							|  |  |  |  |         now = datetime.now() | 
					
						
							|  |  |  |  |         return (now.isoformat(), (now + timedelta(days=1)).isoformat()) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | def fetch_ical_data(event_url): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Fetch and parse iCal data for an OSM Calendar event. | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         event_url (str): The URL of the OSM Calendar event. | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         tuple: A tuple containing (location_name, coordinates). | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         # Check if the URL is an OSM Calendar event URL | 
					
						
							|  |  |  |  |         if not event_url.startswith(OSMCAL_EVENT_BASE_URL): | 
					
						
							|  |  |  |  |             logger.warning(f"Not an OSM Calendar event URL: {event_url}") | 
					
						
							|  |  |  |  |             return ("Unknown Location", [0, 0]) | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |         # Extract the event ID from the URL | 
					
						
							|  |  |  |  |         event_id_match = re.search(r'event/(\d+)', event_url) | 
					
						
							|  |  |  |  |         if not event_id_match: | 
					
						
							|  |  |  |  |             logger.warning(f"Could not extract event ID from URL: {event_url}") | 
					
						
							|  |  |  |  |             return ("Unknown Location", [0, 0]) | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |         event_id = event_id_match.group(1) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Construct the iCal URL | 
					
						
							|  |  |  |  |         ical_url = f"{OSMCAL_EVENT_BASE_URL}{event_id}.ics" | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Fetch the iCal content | 
					
						
							|  |  |  |  |         logger.info(f"Fetching iCal data from: {ical_url}") | 
					
						
							|  |  |  |  |         response = requests.get(ical_url) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         if not response.ok: | 
					
						
							|  |  |  |  |             logger.warning(f"Failed to fetch iCal data: {response.status_code}") | 
					
						
							|  |  |  |  |             return ("Unknown Location", [0, 0]) | 
					
						
							|  |  |  |  |              | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |         # Parse the iCal content avec l'encodage correct | 
					
						
							|  |  |  |  |         response.encoding = response.apparent_encoding or 'utf-8' | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         ical_content = response.text | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Extract GEO information | 
					
						
							|  |  |  |  |         geo_match = re.search(r'GEO:([-+]?\d+\.\d+);([-+]?\d+\.\d+)', ical_content) | 
					
						
							|  |  |  |  |         if geo_match: | 
					
						
							|  |  |  |  |             # GEO format is latitude;longitude | 
					
						
							|  |  |  |  |             latitude = float(geo_match.group(2)) | 
					
						
							|  |  |  |  |             longitude = float(geo_match.group(1)) | 
					
						
							|  |  |  |  |             coordinates = [longitude, latitude]  # GeoJSON uses [longitude, latitude] | 
					
						
							|  |  |  |  |             logger.info(f"Extracted coordinates from iCal: {coordinates}") | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             logger.warning(f"No GEO information found in iCal data for event: {event_id}") | 
					
						
							|  |  |  |  |             coordinates = [0, 0] | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |         # Extract LOCATION information | 
					
						
							|  |  |  |  |         location_match = re.search(r'LOCATION:(.+?)(?:\r\n|\n|\r)', ical_content) | 
					
						
							|  |  |  |  |         if location_match: | 
					
						
							|  |  |  |  |             location_name = location_match.group(1).strip() | 
					
						
							|  |  |  |  |             # Unescape backslash-escaped characters (e.g., \, becomes ,) | 
					
						
							|  |  |  |  |             location_name = re.sub(r'\\(.)', r'\1', location_name) | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |             # Corriger l'encodage | 
					
						
							|  |  |  |  |             location_name = fix_encoding(location_name) | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |             logger.info(f"Extracted location from iCal: {location_name}") | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             logger.warning(f"No LOCATION information found in iCal data for event: {event_id}") | 
					
						
							|  |  |  |  |             location_name = "Unknown Location" | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |         return (location_name, coordinates) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Error fetching or parsing iCal data: {e}") | 
					
						
							|  |  |  |  |         return ("Unknown Location", [0, 0]) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | def extract_location(description): | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Extract location information from the event description. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Args: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         description (str): The event description HTML. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Returns: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         tuple: A tuple containing (location_name, coordinates). | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         # Default coordinates (center of the world) | 
					
						
							|  |  |  |  |         coordinates = [0, 0] | 
					
						
							|  |  |  |  |         location_name = "Unknown Location" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Try to find location in the description | 
					
						
							|  |  |  |  |         location_pattern = r'<p>([^<]+)</p>' | 
					
						
							|  |  |  |  |         location_matches = re.findall(location_pattern, description) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if location_matches and len(location_matches) > 1: | 
					
						
							|  |  |  |  |             # The second paragraph often contains the location | 
					
						
							|  |  |  |  |             location_candidate = location_matches[1].strip() | 
					
						
							|  |  |  |  |             if location_candidate and "," in location_candidate and not location_candidate.startswith('<'): | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |                 location_name = fix_encoding(location_candidate) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |                 # For now, we don't have exact coordinates, so we'll use a placeholder | 
					
						
							|  |  |  |  |                 # In a real implementation, you might want to geocode the location | 
					
						
							|  |  |  |  |                 coordinates = [0, 0] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         return (location_name, coordinates) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     except Exception as e: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         logger.error(f"Error extracting location: {e}") | 
					
						
							|  |  |  |  |         return ("Unknown Location", [0, 0]) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | def create_event(item): | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Create an event object from an RSS item. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Args: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         item: An item element from the RSS feed. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         dict: A GeoJSON Feature representing the event. | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         # Extract data from the item | 
					
						
							|  |  |  |  |         title = item.find('title').text | 
					
						
							|  |  |  |  |         link = item.find('link').text | 
					
						
							|  |  |  |  |         description = item.find('description').text | 
					
						
							|  |  |  |  |         guid = item.find('guid').text | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Clean up the description (remove HTML tags for text extraction) | 
					
						
							|  |  |  |  |         clean_description = re.sub(r'<[^>]+>', ' ', description) | 
					
						
							|  |  |  |  |         clean_description = html.unescape(clean_description) | 
					
						
							|  |  |  |  |         clean_description = re.sub(r'\s+', ' ', clean_description).strip() | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |         # Corriger l'encodage du titre et de la description | 
					
						
							|  |  |  |  |         title = fix_encoding(title) | 
					
						
							|  |  |  |  |         clean_description = fix_encoding(clean_description) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         # Parse dates from the description | 
					
						
							|  |  |  |  |         start_date, end_date = parse_event_dates(description) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         # Extract location information from the description | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         location_name, coordinates = extract_location(description) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         # If we don't have coordinates and the link is to an OSM Calendar event, | 
					
						
							|  |  |  |  |         # try to get coordinates and location from the iCal file | 
					
						
							|  |  |  |  |         if coordinates == [0, 0] and link and link.startswith(OSMCAL_EVENT_BASE_URL): | 
					
						
							|  |  |  |  |             logger.info(f"No coordinates found in description, trying to get from iCal: {link}") | 
					
						
							|  |  |  |  |             ical_location_name, ical_coordinates = fetch_ical_data(link) | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # Use iCal coordinates if available | 
					
						
							|  |  |  |  |             if ical_coordinates != [0, 0]: | 
					
						
							|  |  |  |  |                 coordinates = ical_coordinates | 
					
						
							|  |  |  |  |                 logger.info(f"Using coordinates from iCal: {coordinates}") | 
					
						
							|  |  |  |  |                  | 
					
						
							|  |  |  |  |             # Use iCal location name if available and better than what we have | 
					
						
							|  |  |  |  |             if ical_location_name != "Unknown Location": | 
					
						
							|  |  |  |  |                 location_name = ical_location_name | 
					
						
							|  |  |  |  |                 logger.info(f"Using location name from iCal: {location_name}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         # Create a descriptive label | 
					
						
							|  |  |  |  |         label = title | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         # Create the event object | 
					
						
							|  |  |  |  |         event = { | 
					
						
							|  |  |  |  |             "type": "Feature", | 
					
						
							|  |  |  |  |             "geometry": { | 
					
						
							|  |  |  |  |                 "type": "Point", | 
					
						
							|  |  |  |  |                 "coordinates": coordinates | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             "properties": { | 
					
						
							|  |  |  |  |                 "type": "scheduled", | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |                 "what": "community.osm.event", | 
					
						
							|  |  |  |  |                 "what:series": "OpenStreetMap Calendar", | 
					
						
							|  |  |  |  |                 "where": location_name, | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |                 "label": label, | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |                 "description": clean_description, | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |                 "start": start_date, | 
					
						
							|  |  |  |  |                 "stop": end_date, | 
					
						
							|  |  |  |  |                 "url": link, | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |                 "external_id": guid, | 
					
						
							|  |  |  |  |                 "source": "OSM Calendar" | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |             } | 
					
						
							|  |  |  |  |         } | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         return event | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     except Exception as e: | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         logger.error(f"Error creating event from item: {e}") | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         return None | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | def event_exists(db, properties): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Check if an event with the same properties already exists in the database. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         db: Database connection. | 
					
						
							|  |  |  |  |         properties: Event properties. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         bool: True if the event exists, False otherwise. | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-18 23:43:06 +02:00
										 |  |  |  |     print('event: ', properties) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     try: | 
					
						
							|  |  |  |  |         cur = db.cursor() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Check if an event with the same external_id exists | 
					
						
							|  |  |  |  |         if 'external_id' in properties: | 
					
						
							|  |  |  |  |             cur.execute("""
 | 
					
						
							|  |  |  |  |                 SELECT events_id FROM events | 
					
						
							|  |  |  |  |                 WHERE events_tags->>'external_id' = %s; | 
					
						
							|  |  |  |  |             """, (properties['external_id'],))
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             result = cur.fetchone() | 
					
						
							|  |  |  |  |             if result: | 
					
						
							|  |  |  |  |                 logger.info(f"Event with external_id {properties['external_id']} already exists") | 
					
						
							|  |  |  |  |                 return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Check if an event with the same label, start, and stop exists | 
					
						
							|  |  |  |  |         cur.execute("""
 | 
					
						
							|  |  |  |  |             SELECT events_id FROM events | 
					
						
							|  |  |  |  |             WHERE events_tags->>'label' = %s | 
					
						
							|  |  |  |  |             AND events_tags->>'start' = %s | 
					
						
							|  |  |  |  |             AND events_tags->>'stop' = %s; | 
					
						
							|  |  |  |  |         """, (
 | 
					
						
							|  |  |  |  |             properties.get('label', ''), | 
					
						
							|  |  |  |  |             properties.get('start', ''), | 
					
						
							|  |  |  |  |             properties.get('stop', '') | 
					
						
							|  |  |  |  |         )) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         result = cur.fetchone() | 
					
						
							|  |  |  |  |         if result: | 
					
						
							|  |  |  |  |             logger.info(f"Event with label '{properties.get('label')}' and same dates already exists") | 
					
						
							|  |  |  |  |             return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Error checking if event exists: {e}") | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | def submit_event(event): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |     Submit an event to the OpenEventDatabase using the API. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Args: | 
					
						
							|  |  |  |  |         event: A GeoJSON Feature representing the event. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     Returns: | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |         tuple: A tuple containing (success: bool, event_id: str or None). | 
					
						
							|  |  |  |  |                success is True if the event was successfully submitted, False otherwise. | 
					
						
							|  |  |  |  |                event_id is the OEDB event ID if available, None otherwise. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         # Extract event properties for logging | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         properties = event['properties'] | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         # API endpoint for OpenEventDatabase | 
					
						
							|  |  |  |  |         api_url = "https://api.openeventdatabase.org/event" | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         # Make the API request | 
					
						
							|  |  |  |  |         logger.info(f"Submitting event '{properties.get('label')}' to API") | 
					
						
							|  |  |  |  |         response = requests.post( | 
					
						
							|  |  |  |  |             api_url, | 
					
						
							|  |  |  |  |             headers={"Content-Type": "application/json"}, | 
					
						
							|  |  |  |  |             data=json.dumps(event) | 
					
						
							|  |  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         # Check if the request was successful | 
					
						
							|  |  |  |  |         if response.status_code == 200 or response.status_code == 201: | 
					
						
							|  |  |  |  |             # Parse the response to get the event ID | 
					
						
							|  |  |  |  |             response_data = response.json() | 
					
						
							|  |  |  |  |             event_id = response_data.get('id') | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |             if event_id: | 
					
						
							|  |  |  |  |                 logger.success(f"Event created with ID: {event_id}") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:18:03 +02:00
										 |  |  |  |                 logger.info(f" https://api.openeventdatabase.org/event/{event_id}") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |                 return (True, event_id) | 
					
						
							| 
									
										
										
										
											2025-09-18 23:43:06 +02:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |                 logger.warning(f"Event created but no ID returned in response") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |                 return (True, None) | 
					
						
							| 
									
										
										
										
											2025-09-27 00:18:03 +02:00
										 |  |  |  |         elif response.status_code == 409: | 
					
						
							|  |  |  |  |             # 409 Conflict - L'événement existe déjà, considéré comme un succès | 
					
						
							|  |  |  |  |             logger.success(f"Event already exists in database: {properties.get('label')} (HTTP 409)") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |             # Essayer d'extraire l'ID de l'événement existant depuis la réponse | 
					
						
							|  |  |  |  |             try: | 
					
						
							|  |  |  |  |                 response_data = response.json() | 
					
						
							|  |  |  |  |                 existing_event_id = response_data.get('id') | 
					
						
							|  |  |  |  |                 return (True, existing_event_id) | 
					
						
							|  |  |  |  |             except: | 
					
						
							|  |  |  |  |                 return (True, None) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |             logger.warning(f"Failed to create event: {properties.get('label')}. Status code: {response.status_code}") | 
					
						
							|  |  |  |  |             logger.warning(f"Response: {response.text}") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |             return (False, None) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Error submitting event: {e}") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |         return (False, None) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  | def main(max_events=1, offset=0): | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |     Main function to fetch OSM Calendar events and add them to the OpenEventDatabase API. | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         max_events (int): Maximum number of events to insert (default: 1) | 
					
						
							|  |  |  |  |         offset (int): Number of events to skip from the beginning of the RSS feed (default: 0) | 
					
						
							| 
									
										
										
										
											2025-09-18 22:30:25 +02:00
										 |  |  |  |      | 
					
						
							|  |  |  |  |     The function will exit if the .env file doesn't exist, as it's required | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |     for environment variables. | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |     logger.info(f"Starting OSM Calendar extractor (max_events={max_events}, offset={offset})") | 
					
						
							| 
									
										
										
										
											2025-09-18 22:18:25 +02:00
										 |  |  |  |      | 
					
						
							| 
									
										
										
										
											2025-09-18 22:30:25 +02:00
										 |  |  |  |     # Load environment variables from .env file and check if it exists | 
					
						
							|  |  |  |  |     if not load_env_from_file(): | 
					
						
							|  |  |  |  |         logger.error("Required .env file not found. Exiting.") | 
					
						
							|  |  |  |  |         sys.exit(1) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |     logger.info("Environment variables loaded successfully from .env file") | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     # Charger le cache des événements traités | 
					
						
							|  |  |  |  |     event_cache = load_event_cache() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Scraper la page principale pour obtenir tous les liens d'événements | 
					
						
							|  |  |  |  |     event_links = scrape_osmcal_event_links() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if not event_links: | 
					
						
							|  |  |  |  |         logger.warning("Aucun lien d'événement trouvé sur la page principale") | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Identifier les nouveaux événements (non présents dans le cache ou non traités avec succès) | 
					
						
							|  |  |  |  |     new_events = [] | 
					
						
							|  |  |  |  |     success_events = [] | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     for link in event_links: | 
					
						
							|  |  |  |  |         # Vérifier si l'événement existe dans le cache et a le statut 'success' | 
					
						
							|  |  |  |  |         if link in event_cache and event_cache[link].get('status') == 'success': | 
					
						
							|  |  |  |  |             success_events.append(link) | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |             oedb_id = event_cache[link].get('oedb_event_id', 'ID non disponible') | 
					
						
							|  |  |  |  |             logger.info(f"Événement déjà traité avec succès (ID OEDB: {oedb_id}), ignoré : {link}") | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |         else: | 
					
						
							|  |  |  |  |             new_events.append(link) | 
					
						
							|  |  |  |  |             # Initialiser l'événement dans le cache s'il n'existe pas | 
					
						
							|  |  |  |  |             if link not in event_cache: | 
					
						
							|  |  |  |  |                 event_cache[link] = { | 
					
						
							|  |  |  |  |                     'discovered_at': datetime.now().isoformat(), | 
					
						
							|  |  |  |  |                     'status': 'pending', | 
					
						
							|  |  |  |  |                     'attempts': 0 | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 # Log du statut actuel pour les événements déjà en cache | 
					
						
							|  |  |  |  |                 current_status = event_cache[link].get('status', 'unknown') | 
					
						
							|  |  |  |  |                 attempts = event_cache[link].get('attempts', 0) | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |                 oedb_id = event_cache[link].get('oedb_event_id', 'non disponible') | 
					
						
							|  |  |  |  |                 logger.info(f"Événement à retraiter (statut: {current_status}, tentatives: {attempts}, ID OEDB: {oedb_id}) : {link}") | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     logger.info(f"Liens d'événements trouvés : {len(event_links)}") | 
					
						
							|  |  |  |  |     logger.info(f"Événements déjà traités avec succès : {len(success_events)}") | 
					
						
							|  |  |  |  |     logger.info(f"Nouveaux événements à traiter : {len(new_events)}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if len(new_events) == 0: | 
					
						
							|  |  |  |  |         logger.success("Aucun nouvel événement à traiter. Tous les événements ont déjà été insérés avec succès.") | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |         return | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     # Appliquer l'offset et la limite aux nouveaux événements | 
					
						
							|  |  |  |  |     if offset >= len(new_events): | 
					
						
							|  |  |  |  |         logger.warning(f"Offset {offset} est supérieur ou égal au nombre de nouveaux événements {len(new_events)}") | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |         return | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     events_to_process = new_events[offset:offset + max_events] | 
					
						
							|  |  |  |  |     logger.info(f"Traitement de {len(events_to_process)} nouveaux événements") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Fetch events from the OSM Calendar RSS feed pour obtenir les détails | 
					
						
							|  |  |  |  |     rss_items = fetch_osm_calendar_data() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if not rss_items: | 
					
						
							|  |  |  |  |         logger.warning("Aucun événement trouvé dans le flux RSS, mais continuons avec les liens scrapés") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Créer un mapping des liens RSS vers les items pour un accès rapide | 
					
						
							|  |  |  |  |     rss_link_to_item = {} | 
					
						
							|  |  |  |  |     for item in rss_items: | 
					
						
							|  |  |  |  |         link_element = item.find('link') | 
					
						
							|  |  |  |  |         if link_element is not None: | 
					
						
							|  |  |  |  |             rss_link_to_item[link_element.text] = item | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Process each new event | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  |     success_count = 0 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     for event_link in events_to_process: | 
					
						
							|  |  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2025-09-27 00:18:03 +02:00
										 |  |  |  |             # Vérifier si l'événement est déjà en succès (sécurité supplémentaire) | 
					
						
							|  |  |  |  |             if event_cache.get(event_link, {}).get('status') == 'success': | 
					
						
							|  |  |  |  |                 logger.info(f"Événement déjà en succès, passage au suivant : {event_link}") | 
					
						
							|  |  |  |  |                 success_count += 1  # Compter comme succès puisqu'il est déjà traité | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |             event_cache[event_link]['attempts'] += 1 | 
					
						
							|  |  |  |  |             event_cache[event_link]['last_attempt'] = datetime.now().isoformat() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Chercher l'item correspondant dans le flux RSS | 
					
						
							|  |  |  |  |             rss_item = rss_link_to_item.get(event_link) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             if rss_item is not None: | 
					
						
							|  |  |  |  |                 # Créer l'événement depuis l'item RSS | 
					
						
							|  |  |  |  |                 event = create_event(rss_item) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 # Si pas trouvé dans le flux RSS, essayer de créer un événement minimal depuis le lien | 
					
						
							|  |  |  |  |                 logger.warning(f"Événement {event_link} non trouvé dans le flux RSS, tentative de création depuis le lien") | 
					
						
							|  |  |  |  |                 event = create_event_from_link(event_link) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             if event: | 
					
						
							|  |  |  |  |                 # Tenter de soumettre l'événement à l'API | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |                 submit_success, oedb_event_id = submit_event(event) | 
					
						
							|  |  |  |  |                 if submit_success: | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |                     success_count += 1 | 
					
						
							|  |  |  |  |                     event_cache[event_link]['status'] = 'success' | 
					
						
							|  |  |  |  |                     event_cache[event_link]['inserted_at'] = datetime.now().isoformat() | 
					
						
							| 
									
										
										
										
											2025-09-27 00:39:18 +02:00
										 |  |  |  |                     # Sauvegarder l'ID de l'événement OEDB dans le cache | 
					
						
							|  |  |  |  |                     if oedb_event_id: | 
					
						
							|  |  |  |  |                         event_cache[event_link]['oedb_event_id'] = oedb_event_id | 
					
						
							|  |  |  |  |                         logger.success(f"Événement inséré avec succès (ID OEDB: {oedb_event_id}) : {event_link}") | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         logger.success(f"Événement inséré avec succès (ID OEDB non disponible) : {event_link}") | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     event_cache[event_link]['status'] = 'failed' | 
					
						
							|  |  |  |  |                     logger.warning(f"Échec de l'insertion de l'événement : {event_link}") | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 event_cache[event_link]['status'] = 'failed' | 
					
						
							|  |  |  |  |                 logger.error(f"Impossible de créer l'événement depuis : {event_link}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         except Exception as e: | 
					
						
							|  |  |  |  |             logger.error(f"Erreur lors du traitement de l'événement {event_link} : {e}") | 
					
						
							|  |  |  |  |             event_cache[event_link]['status'] = 'error' | 
					
						
							|  |  |  |  |             event_cache[event_link]['error'] = str(e) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     # Sauvegarder le cache mis à jour | 
					
						
							|  |  |  |  |     save_event_cache(event_cache) | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-27 00:18:03 +02:00
										 |  |  |  |     # Calculer les statistiques finales du cache | 
					
						
							|  |  |  |  |     cache_stats = { | 
					
						
							|  |  |  |  |         'success': 0, | 
					
						
							|  |  |  |  |         'pending': 0, | 
					
						
							|  |  |  |  |         'failed': 0, | 
					
						
							|  |  |  |  |         'error': 0, | 
					
						
							|  |  |  |  |         'total': len(event_cache) | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     for link, data in event_cache.items(): | 
					
						
							|  |  |  |  |         status = data.get('status', 'pending') | 
					
						
							|  |  |  |  |         if status in cache_stats: | 
					
						
							|  |  |  |  |             cache_stats[status] += 1 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Événements en attente d'insertion (tous sauf success) | 
					
						
							|  |  |  |  |     events_awaiting_insertion = cache_stats['pending'] + cache_stats['failed'] + cache_stats['error'] | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  |     logger.success(f"Traitement terminé : {success_count} événements insérés avec succès sur {len(events_to_process)} traités") | 
					
						
							| 
									
										
										
										
											2025-09-27 00:18:03 +02:00
										 |  |  |  |     logger.info("=== STATISTIQUES GLOBALES DU CACHE ===") | 
					
						
							|  |  |  |  |     logger.info(f"Total d'événements dans le cache : {cache_stats['total']}") | 
					
						
							|  |  |  |  |     logger.info(f"Événements traités avec succès : {cache_stats['success']}") | 
					
						
							|  |  |  |  |     logger.info(f"Événements en attente d'insertion : {events_awaiting_insertion}") | 
					
						
							|  |  |  |  |     logger.info(f"  - Statut 'pending' : {cache_stats['pending']}") | 
					
						
							|  |  |  |  |     logger.info(f"  - Statut 'failed' : {cache_stats['failed']}") | 
					
						
							|  |  |  |  |     logger.info(f"  - Statut 'error' : {cache_stats['error']}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if events_awaiting_insertion > 0: | 
					
						
							|  |  |  |  |         logger.info(f"🔄 Il reste {events_awaiting_insertion} événements à traiter lors de la prochaine exécution") | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         logger.success("✅ Tous les événements découverts ont été traités avec succès") | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-26 18:19:54 +02:00
										 |  |  |  | def create_event_from_link(event_link): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Créer un événement minimal depuis un lien osmcal.org quand il n'est pas disponible dans le flux RSS. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         event_link (str): URL de l'événement osmcal.org | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Returns: | 
					
						
							|  |  |  |  |         dict: Un objet GeoJSON Feature représentant l'événement, ou None en cas d'échec | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         logger.info(f"Tentative de création d'événement depuis le lien : {event_link}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # Si c'est un lien vers un événement OSM Calendar, essayer d'obtenir les données iCal | 
					
						
							|  |  |  |  |         if event_link.startswith(OSMCAL_EVENT_BASE_URL): | 
					
						
							|  |  |  |  |             location_name, coordinates = fetch_ical_data(event_link) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Extraire l'ID de l'événement pour créer un GUID | 
					
						
							|  |  |  |  |             event_id_match = re.search(r'event/(\d+)', event_link) | 
					
						
							|  |  |  |  |             if event_id_match: | 
					
						
							|  |  |  |  |                 event_id = event_id_match.group(1) | 
					
						
							|  |  |  |  |                 external_id = f"osmcal_{event_id}" | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 external_id = event_link | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # Créer un événement avec les informations minimales disponibles | 
					
						
							|  |  |  |  |             now = datetime.now() | 
					
						
							|  |  |  |  |             event = { | 
					
						
							|  |  |  |  |                 "type": "Feature", | 
					
						
							|  |  |  |  |                 "geometry": { | 
					
						
							|  |  |  |  |                     "type": "Point", | 
					
						
							|  |  |  |  |                     "coordinates": coordinates | 
					
						
							|  |  |  |  |                 }, | 
					
						
							|  |  |  |  |                 "properties": { | 
					
						
							|  |  |  |  |                     "type": "scheduled", | 
					
						
							|  |  |  |  |                     "what": "community.osm.event", | 
					
						
							|  |  |  |  |                     "what:series": "OpenStreetMap Calendar", | 
					
						
							|  |  |  |  |                     "where": location_name, | 
					
						
							|  |  |  |  |                     "label": f"Événement OSM Calendar {event_id if 'event_id' in locals() else 'inconnu'}", | 
					
						
							|  |  |  |  |                     "description": f"Événement trouvé sur osmcal.org : {event_link}", | 
					
						
							|  |  |  |  |                     "start": now.isoformat(), | 
					
						
							|  |  |  |  |                     "stop": (now + timedelta(days=1)).isoformat(), | 
					
						
							|  |  |  |  |                     "url": event_link, | 
					
						
							|  |  |  |  |                     "external_id": external_id, | 
					
						
							|  |  |  |  |                     "source": "OSM Calendar (scraped)" | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             return event | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             logger.warning(f"Lien non reconnu comme un événement OSM Calendar : {event_link}") | 
					
						
							|  |  |  |  |             return None | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         logger.error(f"Erreur lors de la création d'événement depuis le lien {event_link} : {e}") | 
					
						
							|  |  |  |  |         return None | 
					
						
							| 
									
										
										
										
											2025-09-18 19:27:28 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | if __name__ == "__main__": | 
					
						
							| 
									
										
										
										
											2025-09-26 11:57:54 +02:00
										 |  |  |  |     import argparse | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Set up command line argument parsing | 
					
						
							|  |  |  |  |     parser = argparse.ArgumentParser(description='OSM Calendar Extractor for the OpenEventDatabase') | 
					
						
							|  |  |  |  |     parser.add_argument('--max-events', type=int, default=1,  | 
					
						
							|  |  |  |  |                         help='Maximum number of events to insert (default: 1)') | 
					
						
							|  |  |  |  |     parser.add_argument('--offset', type=int, default=0,  | 
					
						
							|  |  |  |  |                         help='Number of events to skip from the beginning of the RSS feed (default: 0)') | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Parse arguments | 
					
						
							|  |  |  |  |     args = parser.parse_args() | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Run the main function with the provided arguments | 
					
						
							|  |  |  |  |     main(max_events=args.max_events, offset=args.offset) |