| 
									
										
										
										
											2025-10-09 22:55:50 +02:00
										 |  |  |  | #!/usr/bin/env python3 | 
					
						
							|  |  |  |  | """
 | 
					
						
							|  |  |  |  | Démonstration des améliorations du scraper agenda du libre | 
					
						
							|  |  |  |  | Simule les fonctionnalités sans dépendances externes | 
					
						
							|  |  |  |  | """
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import json | 
					
						
							|  |  |  |  | import os | 
					
						
							|  |  |  |  | import sys | 
					
						
							|  |  |  |  | import re | 
					
						
							|  |  |  |  | import time | 
					
						
							|  |  |  |  | from datetime import datetime | 
					
						
							|  |  |  |  | import hashlib | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class DemoAgendaDuLibreScraper: | 
					
						
							| 
									
										
										
										
											2025-10-09 23:35:12 +02:00
										 |  |  |  |     def __init__(self, max_events=None, dry_run=True, parallel=False, max_workers=4): | 
					
						
							| 
									
										
										
										
											2025-10-09 22:55:50 +02:00
										 |  |  |  |         self.max_events = max_events | 
					
						
							|  |  |  |  |         self.dry_run = dry_run | 
					
						
							| 
									
										
										
										
											2025-10-09 23:35:12 +02:00
										 |  |  |  |         self.parallel = parallel | 
					
						
							|  |  |  |  |         self.max_workers = max_workers | 
					
						
							| 
									
										
										
										
											2025-10-09 22:55:50 +02:00
										 |  |  |  |         self.cache_file = "demo_agendadulibre_cache.json" | 
					
						
							|  |  |  |  |         self.events_file = "demo_agendadulibre_events.json" | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Charger les données existantes | 
					
						
							|  |  |  |  |         self.cache_data = self.load_cache_data() | 
					
						
							|  |  |  |  |         self.events_data = self.load_events_data() | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |     def load_cache_data(self): | 
					
						
							|  |  |  |  |         """Charge les données de cache""" | 
					
						
							|  |  |  |  |         if os.path.exists(self.cache_file): | 
					
						
							|  |  |  |  |             try: | 
					
						
							|  |  |  |  |                 with open(self.cache_file, 'r', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |                     return json.load(f) | 
					
						
							|  |  |  |  |             except Exception as e: | 
					
						
							|  |  |  |  |                 print(f"Erreur lors du chargement du cache: {e}") | 
					
						
							|  |  |  |  |         return {"processed_events": {}, "last_fetch": None, "content_hash": None} | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def load_events_data(self): | 
					
						
							|  |  |  |  |         """Charge les données d'événements""" | 
					
						
							|  |  |  |  |         if os.path.exists(self.events_file): | 
					
						
							|  |  |  |  |             try: | 
					
						
							|  |  |  |  |                 with open(self.events_file, 'r', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |                     return json.load(f) | 
					
						
							|  |  |  |  |             except Exception as e: | 
					
						
							|  |  |  |  |                 print(f"Erreur lors du chargement des événements: {e}") | 
					
						
							|  |  |  |  |         return {"events": {}, "last_update": None} | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def save_cache_data(self): | 
					
						
							|  |  |  |  |         """Sauvegarde le cache""" | 
					
						
							|  |  |  |  |         try: | 
					
						
							|  |  |  |  |             with open(self.cache_file, 'w', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |                 json.dump(self.cache_data, f, ensure_ascii=False, indent=2) | 
					
						
							|  |  |  |  |         except Exception as e: | 
					
						
							|  |  |  |  |             print(f"Erreur lors de la sauvegarde du cache: {e}") | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def save_events_data(self): | 
					
						
							|  |  |  |  |         """Sauvegarde les événements""" | 
					
						
							|  |  |  |  |         try: | 
					
						
							|  |  |  |  |             with open(self.events_file, 'w', encoding='utf-8') as f: | 
					
						
							|  |  |  |  |                 json.dump(self.events_data, f, ensure_ascii=False, indent=2) | 
					
						
							|  |  |  |  |         except Exception as e: | 
					
						
							|  |  |  |  |             print(f"Erreur lors de la sauvegarde des événements: {e}") | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def get_content_hash(self, content): | 
					
						
							|  |  |  |  |         """Calcule le hash du contenu""" | 
					
						
							|  |  |  |  |         return hashlib.md5(content.encode('utf-8')).hexdigest() | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def simulate_ical_fetch(self): | 
					
						
							|  |  |  |  |         """Simule la récupération d'un fichier iCal""" | 
					
						
							|  |  |  |  |         # Simuler du contenu iCal | 
					
						
							|  |  |  |  |         ical_content = f"""
 | 
					
						
							|  |  |  |  | BEGIN:VCALENDAR | 
					
						
							|  |  |  |  | VERSION:2.0 | 
					
						
							|  |  |  |  | PRODID:-//Demo//Agenda du Libre//EN | 
					
						
							|  |  |  |  | BEGIN:VEVENT | 
					
						
							|  |  |  |  | UID:event1@demo.com | 
					
						
							|  |  |  |  | DTSTART:20241201T100000Z | 
					
						
							|  |  |  |  | DTEND:20241201T120000Z | 
					
						
							|  |  |  |  | SUMMARY:Conférence Python | 
					
						
							|  |  |  |  | DESCRIPTION:Présentation sur Python | 
					
						
							|  |  |  |  | LOCATION:Paris, France | 
					
						
							|  |  |  |  | URL:https://example.com/event1 | 
					
						
							|  |  |  |  | END:VEVENT | 
					
						
							|  |  |  |  | BEGIN:VEVENT | 
					
						
							|  |  |  |  | UID:event2@demo.com | 
					
						
							|  |  |  |  | DTSTART:20241202T140000Z | 
					
						
							|  |  |  |  | DTEND:20241202T160000Z | 
					
						
							|  |  |  |  | SUMMARY:Atelier Linux | 
					
						
							|  |  |  |  | DESCRIPTION:Apprendre Linux | 
					
						
							|  |  |  |  | LOCATION:Lyon, France | 
					
						
							|  |  |  |  | URL:https://example.com/event2 | 
					
						
							|  |  |  |  | END:VEVENT | 
					
						
							|  |  |  |  | BEGIN:VEVENT | 
					
						
							|  |  |  |  | UID:event3@demo.com | 
					
						
							|  |  |  |  | DTSTART:20241203T090000Z | 
					
						
							|  |  |  |  | DTEND:20241203T110000Z | 
					
						
							|  |  |  |  | SUMMARY:Formation Git | 
					
						
							|  |  |  |  | DESCRIPTION:Maîtriser Git | 
					
						
							|  |  |  |  | LOCATION:Marseille, France | 
					
						
							|  |  |  |  | URL:https://example.com/event3 | 
					
						
							|  |  |  |  | END:VEVENT | 
					
						
							|  |  |  |  | BEGIN:VEVENT | 
					
						
							|  |  |  |  | UID:event4@demo.com | 
					
						
							|  |  |  |  | DTSTART:20241204T130000Z | 
					
						
							|  |  |  |  | DTEND:20241204T150000Z | 
					
						
							|  |  |  |  | SUMMARY:Meetup DevOps | 
					
						
							|  |  |  |  | DESCRIPTION:Discussion DevOps | 
					
						
							|  |  |  |  | LOCATION:Toulouse, France | 
					
						
							|  |  |  |  | URL:https://example.com/event4 | 
					
						
							|  |  |  |  | END:VEVENT | 
					
						
							|  |  |  |  | BEGIN:VEVENT | 
					
						
							|  |  |  |  | UID:event5@demo.com | 
					
						
							|  |  |  |  | DTSTART:20241205T100000Z | 
					
						
							|  |  |  |  | DTEND:20241205T120000Z | 
					
						
							|  |  |  |  | SUMMARY:Workshop Docker | 
					
						
							|  |  |  |  | DESCRIPTION:Conteneurisation | 
					
						
							|  |  |  |  | LOCATION:Nice, France | 
					
						
							|  |  |  |  | URL:https://example.com/event5 | 
					
						
							|  |  |  |  | END:VEVENT | 
					
						
							|  |  |  |  | END:VCALENDAR | 
					
						
							|  |  |  |  | """
 | 
					
						
							|  |  |  |  |         return ical_content | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_geo_coordinates(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction des coordonnées GEO""" | 
					
						
							|  |  |  |  |         # Simuler des coordonnées GEO pour certains événements | 
					
						
							|  |  |  |  |         geo_simulation = { | 
					
						
							|  |  |  |  |             "Centre de conférences, 15 rue de la Paix, Paris, France": [2.3522, 48.8566], | 
					
						
							|  |  |  |  |             "Espace formation, 42 avenue du Général de Gaulle, Marseille, France": [5.3698, 43.2965] | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         location = event_data["location"] | 
					
						
							|  |  |  |  |         if location in geo_simulation: | 
					
						
							|  |  |  |  |             coords = geo_simulation[location] | 
					
						
							|  |  |  |  |             print(f"📍 Coordonnées GEO trouvées: {coords[1]}, {coords[0]}") | 
					
						
							|  |  |  |  |             return coords | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             print("Aucun champ GEO trouvé") | 
					
						
							|  |  |  |  |             return None | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_categories(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction des catégories""" | 
					
						
							|  |  |  |  |         # Simuler des catégories pour certains événements | 
					
						
							|  |  |  |  |         categories_simulation = { | 
					
						
							|  |  |  |  |             "Centre de conférences, 15 rue de la Paix, Paris, France": ["python", "programmation", "conférence"], | 
					
						
							|  |  |  |  |             "Espace formation, 42 avenue du Général de Gaulle, Marseille, France": ["git", "formation", "développement"], | 
					
						
							|  |  |  |  |             "Lyon, France": ["linux", "atelier", "entraide"], | 
					
						
							|  |  |  |  |             "Toulouse, France": ["devops", "meetup", "discussion"], | 
					
						
							|  |  |  |  |             "Nice, France": ["docker", "workshop", "conteneurisation"] | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         location = event_data["location"] | 
					
						
							|  |  |  |  |         if location in categories_simulation: | 
					
						
							|  |  |  |  |             categories = categories_simulation[location] | 
					
						
							|  |  |  |  |             print(f"🏷️ Catégories trouvées: {', '.join(categories)}") | 
					
						
							|  |  |  |  |             return categories | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             print("Aucune catégorie trouvée") | 
					
						
							|  |  |  |  |             return [] | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_organizer(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction de l'organisateur""" | 
					
						
							|  |  |  |  |         organizers_simulation = { | 
					
						
							|  |  |  |  |             "Centre de conférences, 15 rue de la Paix, Paris, France": "mailto:contact@python.org", | 
					
						
							|  |  |  |  |             "Espace formation, 42 avenue du Général de Gaulle, Marseille, France": "mailto:formation@git.org", | 
					
						
							|  |  |  |  |             "Lyon, France": "mailto:contact@aldil.org", | 
					
						
							|  |  |  |  |             "Toulouse, France": "mailto:devops@toulouse.org", | 
					
						
							|  |  |  |  |             "Nice, France": "mailto:docker@nice.org" | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         location = event_data["location"] | 
					
						
							|  |  |  |  |         if location in organizers_simulation: | 
					
						
							|  |  |  |  |             organizer = organizers_simulation[location] | 
					
						
							|  |  |  |  |             print(f"👤 Organisateur trouvé: {organizer}") | 
					
						
							|  |  |  |  |             return organizer | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             print("Aucun organisateur trouvé") | 
					
						
							|  |  |  |  |             return None | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_alt_description(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction de la description alternative HTML""" | 
					
						
							|  |  |  |  |         # Simuler une description HTML pour certains événements | 
					
						
							|  |  |  |  |         if "Centre de conférences" in event_data["location"]: | 
					
						
							|  |  |  |  |             alt_desc = "<p>Conférence sur <strong>Python</strong> avec présentation des nouveautés</p>" | 
					
						
							|  |  |  |  |             print(f"📄 Description alternative HTML trouvée: {len(alt_desc)} caractères") | 
					
						
							|  |  |  |  |             return alt_desc | 
					
						
							|  |  |  |  |         return None | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_short_description(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction de la description courte""" | 
					
						
							|  |  |  |  |         summary = event_data["summary"] | 
					
						
							|  |  |  |  |         print(f"📝 Description courte trouvée: {summary}") | 
					
						
							|  |  |  |  |         return summary | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_sequence(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction de la séquence""" | 
					
						
							|  |  |  |  |         # Simuler des numéros de séquence | 
					
						
							|  |  |  |  |         sequences = [1, 2, 3, 4, 5] | 
					
						
							|  |  |  |  |         seq_num = sequences[len(event_data["summary"]) % len(sequences)] | 
					
						
							|  |  |  |  |         print(f"🔢 Séquence trouvée: {seq_num}") | 
					
						
							|  |  |  |  |         return seq_num | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def extract_repeat_rules(self, event_data): | 
					
						
							|  |  |  |  |         """Simule l'extraction des règles de répétition""" | 
					
						
							|  |  |  |  |         # Simuler des règles de répétition pour certains événements | 
					
						
							|  |  |  |  |         if "Atelier" in event_data["summary"]: | 
					
						
							|  |  |  |  |             rrule = "FREQ=WEEKLY;BYDAY=TU" | 
					
						
							|  |  |  |  |             print(f"🔄 Règles de répétition trouvées: {rrule}") | 
					
						
							|  |  |  |  |             return rrule | 
					
						
							|  |  |  |  |         elif "Workshop" in event_data["summary"]: | 
					
						
							|  |  |  |  |             rrule = "FREQ=MONTHLY;BYDAY=1SA" | 
					
						
							|  |  |  |  |             print(f"🔄 Règles de répétition trouvées: {rrule}") | 
					
						
							|  |  |  |  |             return rrule | 
					
						
							|  |  |  |  |         return None | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def parse_event(self, event_data): | 
					
						
							|  |  |  |  |         """Parse un événement simulé""" | 
					
						
							|  |  |  |  |         # Extraire les coordonnées GEO si disponibles | 
					
						
							|  |  |  |  |         geo_coords = self.extract_geo_coordinates(event_data) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Extraire les catégories si disponibles | 
					
						
							|  |  |  |  |         categories = self.extract_categories(event_data) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Extraire les propriétés supplémentaires | 
					
						
							|  |  |  |  |         organizer = self.extract_organizer(event_data) | 
					
						
							|  |  |  |  |         alt_description = self.extract_alt_description(event_data) | 
					
						
							|  |  |  |  |         short_description = self.extract_short_description(event_data) | 
					
						
							|  |  |  |  |         sequence = self.extract_sequence(event_data) | 
					
						
							|  |  |  |  |         repeat_rules = self.extract_repeat_rules(event_data) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         return { | 
					
						
							|  |  |  |  |             "id": hashlib.md5(event_data["summary"].encode('utf-8')).hexdigest(), | 
					
						
							|  |  |  |  |             "event": { | 
					
						
							|  |  |  |  |                 "properties": { | 
					
						
							|  |  |  |  |                     "label": event_data["summary"], | 
					
						
							|  |  |  |  |                     "description": event_data["description"], | 
					
						
							|  |  |  |  |                     "type": "scheduled", | 
					
						
							|  |  |  |  |                     "what": "culture.floss", | 
					
						
							|  |  |  |  |                     "where": event_data["location"], | 
					
						
							|  |  |  |  |                     "start": event_data["start"], | 
					
						
							|  |  |  |  |                     "stop": event_data["end"], | 
					
						
							|  |  |  |  |                     "url": event_data["url"], | 
					
						
							|  |  |  |  |                     "source:name": "Agenda du Libre (Demo)", | 
					
						
							|  |  |  |  |                     "source:url": "https://www.agendadulibre.org/", | 
					
						
							|  |  |  |  |                     "last_modified_by": "demo_scraper", | 
					
						
							|  |  |  |  |                     "tags": categories if categories else [], | 
					
						
							|  |  |  |  |                     "organizer": organizer, | 
					
						
							|  |  |  |  |                     "alt_description": alt_description, | 
					
						
							|  |  |  |  |                     "short_description": short_description, | 
					
						
							|  |  |  |  |                     "sequence": sequence, | 
					
						
							|  |  |  |  |                     "repeat_rules": repeat_rules | 
					
						
							|  |  |  |  |                 }, | 
					
						
							|  |  |  |  |                 "geometry": { | 
					
						
							|  |  |  |  |                     "type": "Point", | 
					
						
							|  |  |  |  |                     "coordinates": geo_coords if geo_coords else [0, 0] | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             "raw_ical": { | 
					
						
							|  |  |  |  |                 "geo": geo_coords, | 
					
						
							|  |  |  |  |                 "categories": categories, | 
					
						
							|  |  |  |  |                 "organizer": organizer, | 
					
						
							|  |  |  |  |                 "alt_description": alt_description, | 
					
						
							|  |  |  |  |                 "short_description": short_description, | 
					
						
							|  |  |  |  |                 "sequence": sequence, | 
					
						
							|  |  |  |  |                 "repeat_rules": repeat_rules | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def clean_location_for_geocoding(self, location): | 
					
						
							|  |  |  |  |         """Nettoie le lieu pour le géocodage en extrayant l'adresse après la première virgule""" | 
					
						
							|  |  |  |  |         if not location or location.strip() == "": | 
					
						
							|  |  |  |  |             return None | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Diviser par la première virgule | 
					
						
							|  |  |  |  |         parts = location.split(',', 1) | 
					
						
							|  |  |  |  |         if len(parts) > 1: | 
					
						
							|  |  |  |  |             # Prendre la partie après la première virgule | 
					
						
							|  |  |  |  |             address_part = parts[1].strip() | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # Vérifier si on a un numéro et une adresse | 
					
						
							|  |  |  |  |             # Pattern pour détecter un numéro suivi d'une adresse | 
					
						
							|  |  |  |  |             address_pattern = r'^\s*\d+.*' | 
					
						
							|  |  |  |  |             if re.match(address_pattern, address_part): | 
					
						
							|  |  |  |  |                 print(f"📍 Adresse potentielle trouvée: {address_part}") | 
					
						
							|  |  |  |  |                 return address_part | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Si pas de virgule ou pas d'adresse valide, essayer le lieu complet | 
					
						
							|  |  |  |  |         print(f"📍 Tentative de géocodage avec le lieu complet: {location}") | 
					
						
							|  |  |  |  |         return location.strip() | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def simulate_geocoding(self, location): | 
					
						
							|  |  |  |  |         """Simule le géocodage avec des coordonnées fictives""" | 
					
						
							|  |  |  |  |         if not location: | 
					
						
							|  |  |  |  |             return None | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Simulation de coordonnées basées sur le lieu | 
					
						
							|  |  |  |  |         fake_coords = { | 
					
						
							|  |  |  |  |             "Paris": [2.3522, 48.8566], | 
					
						
							|  |  |  |  |             "Lyon": [4.8357, 45.7640], | 
					
						
							|  |  |  |  |             "Marseille": [5.3698, 43.2965], | 
					
						
							|  |  |  |  |             "Toulouse": [1.4442, 43.6047], | 
					
						
							|  |  |  |  |             "Nice": [7.2619, 43.7102], | 
					
						
							|  |  |  |  |             "Nantes": [-1.5536, 47.2184], | 
					
						
							|  |  |  |  |             "Strasbourg": [7.7521, 48.5734], | 
					
						
							|  |  |  |  |             "Montpellier": [3.8767, 43.6110], | 
					
						
							|  |  |  |  |             "Bordeaux": [-0.5792, 44.8378], | 
					
						
							|  |  |  |  |             "Lille": [3.0573, 50.6292] | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Chercher une correspondance dans les villes connues | 
					
						
							|  |  |  |  |         for city, coords in fake_coords.items(): | 
					
						
							|  |  |  |  |             if city.lower() in location.lower(): | 
					
						
							|  |  |  |  |                 print(f"🌍 Géocodage simulé: {location} -> {coords}") | 
					
						
							|  |  |  |  |                 return coords | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Coordonnées par défaut si pas de correspondance | 
					
						
							|  |  |  |  |         default_coords = [2.3522, 48.8566]  # Paris par défaut | 
					
						
							|  |  |  |  |         print(f"🌍 Géocodage simulé (défaut): {location} -> {default_coords}") | 
					
						
							|  |  |  |  |         return default_coords | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def improve_event_coordinates(self, event_data): | 
					
						
							|  |  |  |  |         """Améliore les coordonnées de l'événement si nécessaire""" | 
					
						
							|  |  |  |  |         coords = event_data["event"]["geometry"]["coordinates"] | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Vérifier si les coordonnées sont par défaut (0, 0) | 
					
						
							|  |  |  |  |         if coords == [0, 0]: | 
					
						
							|  |  |  |  |             location = event_data["event"]["properties"].get("where", "") | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             if location: | 
					
						
							|  |  |  |  |                 # Nettoyer le lieu pour le géocodage | 
					
						
							|  |  |  |  |                 clean_location = self.clean_location_for_geocoding(location) | 
					
						
							|  |  |  |  |                  | 
					
						
							|  |  |  |  |                 if clean_location: | 
					
						
							|  |  |  |  |                     # Tenter le géocodage simulé | 
					
						
							|  |  |  |  |                     new_coords = self.simulate_geocoding(clean_location) | 
					
						
							|  |  |  |  |                      | 
					
						
							|  |  |  |  |                     if new_coords: | 
					
						
							|  |  |  |  |                         # Mettre à jour les coordonnées | 
					
						
							|  |  |  |  |                         event_data["event"]["geometry"]["coordinates"] = new_coords | 
					
						
							|  |  |  |  |                         print(f"🎯 Coordonnées mises à jour par géocodage: {coords} -> {new_coords}") | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         print(f"⚠️ Impossible de géocoder: {clean_location}") | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     print(f"ℹ️ Lieu non géocodable: {location}") | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 print("ℹ️ Aucun lieu spécifié pour le géocodage") | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             # Vérifier si les coordonnées viennent du champ GEO | 
					
						
							|  |  |  |  |             geo_coords = event_data.get("raw_ical", {}).get("geo") | 
					
						
							|  |  |  |  |             if geo_coords: | 
					
						
							|  |  |  |  |                 print(f"✅ Coordonnées utilisées depuis le champ GEO: {coords}") | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 print(f"ℹ️ Coordonnées déjà définies: {coords}") | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         return event_data | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def log_event_details(self, event_data): | 
					
						
							|  |  |  |  |         """Log détaillé de l'événement avant envoi""" | 
					
						
							|  |  |  |  |         props = event_data["event"]["properties"] | 
					
						
							|  |  |  |  |         geom = event_data["event"]["geometry"] | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         print("📝 Détails de l'événement à insérer:") | 
					
						
							|  |  |  |  |         print(f"   ID: {event_data['id']}") | 
					
						
							|  |  |  |  |         print(f"   Titre: {props.get('label', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Description: {props.get('description', 'N/A')[:100]}{'...' if len(props.get('description', '')) > 100 else ''}") | 
					
						
							|  |  |  |  |         print(f"   Type: {props.get('type', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Catégorie: {props.get('what', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Lieu: {props.get('where', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Début: {props.get('start', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Fin: {props.get('stop', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   URL: {props.get('url', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Source: {props.get('source:name', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Coordonnées: {geom.get('coordinates', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Tags: {', '.join(props.get('tags', [])) if props.get('tags') else 'N/A'}") | 
					
						
							|  |  |  |  |         print(f"   Organisateur: {props.get('organizer', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Description courte: {props.get('short_description', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Séquence: {props.get('sequence', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Règles de répétition: {props.get('repeat_rules', 'N/A')}") | 
					
						
							|  |  |  |  |         print(f"   Description HTML: {'Oui' if props.get('alt_description') else 'N/A'}") | 
					
						
							|  |  |  |  |         print(f"   Modifié par: {props.get('last_modified_by', 'N/A')}") | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def send_event_to_api(self, event_data, skip_geocoding=False): | 
					
						
							|  |  |  |  |         """Simule l'envoi à l'API""" | 
					
						
							|  |  |  |  |         # Améliorer les coordonnées si nécessaire (sauf si déjà traité) | 
					
						
							|  |  |  |  |         if not skip_geocoding: | 
					
						
							|  |  |  |  |             event_data = self.improve_event_coordinates(event_data) | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             print("ℹ️ Géocodage ignoré - événement déjà traité") | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Log détaillé de l'événement | 
					
						
							|  |  |  |  |         self.log_event_details(event_data) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         if self.dry_run: | 
					
						
							|  |  |  |  |             print(f"[DRY-RUN] Simulation d'envoi: {event_data['event']['properties']['label']}") | 
					
						
							|  |  |  |  |             return True, "Simulé (dry-run)" | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             print(f"[API] Envoi réel: {event_data['event']['properties']['label']}") | 
					
						
							|  |  |  |  |             return True, "Envoyé avec succès" | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def process_events(self): | 
					
						
							|  |  |  |  |         """Traite les événements""" | 
					
						
							|  |  |  |  |         # Simuler des événements avec des lieux variés pour tester le géocodage | 
					
						
							|  |  |  |  |         events = [ | 
					
						
							|  |  |  |  |             { | 
					
						
							|  |  |  |  |                 "summary": "Conférence Python", | 
					
						
							|  |  |  |  |                 "description": "Présentation sur Python", | 
					
						
							|  |  |  |  |                 "location": "Centre de conférences, 15 rue de la Paix, Paris, France", | 
					
						
							|  |  |  |  |                 "start": "2024-12-01T10:00:00", | 
					
						
							|  |  |  |  |                 "end": "2024-12-01T12:00:00", | 
					
						
							|  |  |  |  |                 "url": "https://example.com/event1" | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             { | 
					
						
							|  |  |  |  |                 "summary": "Atelier Linux", | 
					
						
							|  |  |  |  |                 "description": "Apprendre Linux", | 
					
						
							|  |  |  |  |                 "location": "Lyon, France", | 
					
						
							|  |  |  |  |                 "start": "2024-12-02T14:00:00", | 
					
						
							|  |  |  |  |                 "end": "2024-12-02T16:00:00", | 
					
						
							|  |  |  |  |                 "url": "https://example.com/event2" | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             { | 
					
						
							|  |  |  |  |                 "summary": "Formation Git", | 
					
						
							|  |  |  |  |                 "description": "Maîtriser Git", | 
					
						
							|  |  |  |  |                 "location": "Espace formation, 42 avenue du Général de Gaulle, Marseille, France", | 
					
						
							|  |  |  |  |                 "start": "2024-12-03T09:00:00", | 
					
						
							|  |  |  |  |                 "end": "2024-12-03T11:00:00", | 
					
						
							|  |  |  |  |                 "url": "https://example.com/event3" | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             { | 
					
						
							|  |  |  |  |                 "summary": "Meetup DevOps", | 
					
						
							|  |  |  |  |                 "description": "Discussion DevOps", | 
					
						
							|  |  |  |  |                 "location": "Toulouse, France", | 
					
						
							|  |  |  |  |                 "start": "2024-12-04T13:00:00", | 
					
						
							|  |  |  |  |                 "end": "2024-12-04T15:00:00", | 
					
						
							|  |  |  |  |                 "url": "https://example.com/event4" | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             { | 
					
						
							|  |  |  |  |                 "summary": "Workshop Docker", | 
					
						
							|  |  |  |  |                 "description": "Conteneurisation", | 
					
						
							|  |  |  |  |                 "location": "Nice, France", | 
					
						
							|  |  |  |  |                 "start": "2024-12-05T10:00:00", | 
					
						
							|  |  |  |  |                 "end": "2024-12-05T12:00:00", | 
					
						
							|  |  |  |  |                 "url": "https://example.com/event5" | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  |         ] | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         stats = { | 
					
						
							|  |  |  |  |             "total_events": len(events), | 
					
						
							|  |  |  |  |             "new_events": 0, | 
					
						
							|  |  |  |  |             "already_saved": 0, | 
					
						
							|  |  |  |  |             "api_errors": 0, | 
					
						
							|  |  |  |  |             "parse_errors": 0, | 
					
						
							|  |  |  |  |             "sent_this_run": 0, | 
					
						
							|  |  |  |  |             "skipped_due_to_limit": 0 | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         processed_count = 0 | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         print(f"Traitement de {len(events)} événements") | 
					
						
							|  |  |  |  |         if self.max_events: | 
					
						
							|  |  |  |  |             print(f"Limite d'événements: {self.max_events}") | 
					
						
							|  |  |  |  |         if self.dry_run: | 
					
						
							|  |  |  |  |             print("Mode DRY-RUN activé - aucun événement ne sera envoyé à l'API") | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         for event_data in events: | 
					
						
							|  |  |  |  |             # Vérifier la limite | 
					
						
							|  |  |  |  |             if self.max_events and processed_count >= self.max_events: | 
					
						
							|  |  |  |  |                 stats["skipped_due_to_limit"] += 1 | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # Parser l'événement | 
					
						
							|  |  |  |  |             parsed_event = self.parse_event(event_data) | 
					
						
							|  |  |  |  |             event_id = parsed_event["id"] | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # Vérifier si déjà traité | 
					
						
							|  |  |  |  |             if event_id in self.cache_data["processed_events"]: | 
					
						
							|  |  |  |  |                 stats["already_saved"] += 1 | 
					
						
							|  |  |  |  |                 print(f"Événement déjà traité: {parsed_event['event']['properties']['label']}") | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # Vérifier si l'événement a déjà été traité avec succès | 
					
						
							|  |  |  |  |             skip_geocoding = False | 
					
						
							|  |  |  |  |             if event_id in self.events_data["events"]: | 
					
						
							|  |  |  |  |                 event_status = self.events_data["events"][event_id].get("status", "unknown") | 
					
						
							|  |  |  |  |                 if event_status in ["saved", "already_exists"]: | 
					
						
							|  |  |  |  |                     skip_geocoding = True | 
					
						
							|  |  |  |  |                     print(f"ℹ️ Géocodage ignoré pour {parsed_event['event']['properties']['label']} - déjà traité") | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # Envoyer à l'API | 
					
						
							|  |  |  |  |             success, message = self.send_event_to_api(parsed_event, skip_geocoding=skip_geocoding) | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             if success: | 
					
						
							|  |  |  |  |                 stats["new_events"] += 1 | 
					
						
							|  |  |  |  |                 stats["sent_this_run"] += 1 | 
					
						
							|  |  |  |  |                  | 
					
						
							|  |  |  |  |                 # Mettre à jour les données | 
					
						
							|  |  |  |  |                 self.events_data["events"][event_id] = { | 
					
						
							|  |  |  |  |                     "status": "saved", | 
					
						
							|  |  |  |  |                     "message": message, | 
					
						
							|  |  |  |  |                     "last_attempt": datetime.now().isoformat(), | 
					
						
							|  |  |  |  |                     "event": parsed_event["event"] | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |                  | 
					
						
							|  |  |  |  |                 self.cache_data["processed_events"][event_id] = { | 
					
						
							|  |  |  |  |                     "processed_at": datetime.now().isoformat(), | 
					
						
							|  |  |  |  |                     "status": "saved", | 
					
						
							|  |  |  |  |                     "event_label": parsed_event["event"]["properties"]["label"] | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |                  | 
					
						
							|  |  |  |  |                 print(f"✅ {parsed_event['event']['properties']['label']} - {message}") | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 stats["api_errors"] += 1 | 
					
						
							|  |  |  |  |                 print(f"❌ {parsed_event['event']['properties']['label']} - Erreur") | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             processed_count += 1 | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Mettre à jour les timestamps | 
					
						
							|  |  |  |  |         self.events_data["last_update"] = datetime.now().isoformat() | 
					
						
							|  |  |  |  |         self.cache_data["last_fetch"] = datetime.now().isoformat() | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Sauvegarder | 
					
						
							|  |  |  |  |         self.save_events_data() | 
					
						
							|  |  |  |  |         self.save_cache_data() | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         return stats | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     def run(self): | 
					
						
							|  |  |  |  |         """Exécute la démonstration""" | 
					
						
							|  |  |  |  |         print("🚀 Démonstration du scraper agenda du libre amélioré") | 
					
						
							|  |  |  |  |         print(f"Configuration: max_events={self.max_events}, dry_run={self.dry_run}") | 
					
						
							|  |  |  |  |         print("=" * 60) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Simuler la récupération iCal | 
					
						
							|  |  |  |  |         ical_content = self.simulate_ical_fetch() | 
					
						
							|  |  |  |  |         content_hash = self.get_content_hash(ical_content) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Vérifier si le contenu a changé | 
					
						
							|  |  |  |  |         if self.cache_data["content_hash"] == content_hash: | 
					
						
							|  |  |  |  |             print("Contenu iCal identique au précédent, utilisation du cache") | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             print("Nouveau contenu iCal détecté, mise à jour du cache") | 
					
						
							|  |  |  |  |             self.cache_data["content_hash"] = content_hash | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Traiter les événements | 
					
						
							|  |  |  |  |         stats = self.process_events() | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Afficher les statistiques | 
					
						
							|  |  |  |  |         print("\n📊 Statistiques finales:") | 
					
						
							|  |  |  |  |         print(f"   Total d'événements trouvés: {stats['total_events']}") | 
					
						
							|  |  |  |  |         print(f"   Nouveaux événements envoyés: {stats['new_events']}") | 
					
						
							|  |  |  |  |         print(f"   Événements déjà existants: {stats['already_saved']}") | 
					
						
							|  |  |  |  |         print(f"   Erreurs d'API: {stats['api_errors']}") | 
					
						
							|  |  |  |  |         print(f"   Erreurs de parsing: {stats['parse_errors']}") | 
					
						
							|  |  |  |  |         print(f"   Événements envoyés cette fois: {stats['sent_this_run']}") | 
					
						
							|  |  |  |  |         if stats['skipped_due_to_limit'] > 0: | 
					
						
							|  |  |  |  |             print(f"   Événements ignorés (limite atteinte): {stats['skipped_due_to_limit']}") | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         print("\n✅ Démonstration terminée avec succès") | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         # Afficher les fichiers générés | 
					
						
							|  |  |  |  |         print(f"\n📁 Fichiers générés:") | 
					
						
							|  |  |  |  |         if os.path.exists(self.cache_file): | 
					
						
							|  |  |  |  |             print(f"   Cache: {self.cache_file}") | 
					
						
							|  |  |  |  |         if os.path.exists(self.events_file): | 
					
						
							|  |  |  |  |             print(f"   Événements: {self.events_file}") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def main(): | 
					
						
							|  |  |  |  |     """Fonction principale de démonstration""" | 
					
						
							|  |  |  |  |     print("🧪 Démonstration des améliorations du scraper agenda du libre") | 
					
						
							|  |  |  |  |     print("=" * 60) | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Test 1: Mode dry-run avec limite | 
					
						
							|  |  |  |  |     print("\n1️⃣ Test 1: Mode dry-run avec limite de 3 événements") | 
					
						
							|  |  |  |  |     scraper1 = DemoAgendaDuLibreScraper(max_events=3, dry_run=True) | 
					
						
							|  |  |  |  |     scraper1.run() | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Test 2: Mode dry-run sans limite | 
					
						
							|  |  |  |  |     print("\n2️⃣ Test 2: Mode dry-run sans limite") | 
					
						
							|  |  |  |  |     scraper2 = DemoAgendaDuLibreScraper(max_events=None, dry_run=True) | 
					
						
							|  |  |  |  |     scraper2.run() | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     # Test 3: Mode réel avec limite | 
					
						
							|  |  |  |  |     print("\n3️⃣ Test 3: Mode réel avec limite de 2 événements") | 
					
						
							|  |  |  |  |     scraper3 = DemoAgendaDuLibreScraper(max_events=2, dry_run=False) | 
					
						
							|  |  |  |  |     scraper3.run() | 
					
						
							|  |  |  |  |      | 
					
						
							| 
									
										
										
										
											2025-10-09 23:35:12 +02:00
										 |  |  |  |     # Test 4: Mode parallèle | 
					
						
							|  |  |  |  |     print("\n4️⃣ Test 4: Mode parallèle avec 15 événements") | 
					
						
							|  |  |  |  |     scraper4 = DemoAgendaDuLibreScraper(max_events=15, dry_run=True, parallel=True, max_workers=3) | 
					
						
							|  |  |  |  |     scraper4.run() | 
					
						
							|  |  |  |  |      | 
					
						
							| 
									
										
										
										
											2025-10-09 22:55:50 +02:00
										 |  |  |  |     print("\n🎉 Toutes les démonstrations sont terminées !") | 
					
						
							|  |  |  |  |     print("\nFonctionnalités démontrées:") | 
					
						
							|  |  |  |  |     print("✅ Cache JSON intelligent") | 
					
						
							|  |  |  |  |     print("✅ Limitation du nombre d'événements") | 
					
						
							|  |  |  |  |     print("✅ Mode dry-run par défaut") | 
					
						
							|  |  |  |  |     print("✅ Détection de changements de contenu") | 
					
						
							|  |  |  |  |     print("✅ Suivi des événements traités") | 
					
						
							| 
									
										
										
										
											2025-10-09 23:35:12 +02:00
										 |  |  |  |     print("✅ Traitement parallèle") | 
					
						
							| 
									
										
										
										
											2025-10-09 22:55:50 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |  |     main() |