84 lines
		
	
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			84 lines
		
	
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | #!/usr/bin/env python3 | ||
|  | """
 | ||
|  | Script de debug pour analyser la structure HTML de l'agenda CCPL | ||
|  | """
 | ||
|  | 
 | ||
|  | import requests | ||
|  | from bs4 import BeautifulSoup | ||
|  | import re | ||
|  | 
 | ||
|  | def debug_html_structure(): | ||
|  |     """Analyse la structure HTML de l'agenda CCPL""" | ||
|  |     url = "https://www.cc-paysdelimours.fr/agenda" | ||
|  |      | ||
|  |     headers = { | ||
|  |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | ||
|  |     } | ||
|  |      | ||
|  |     try: | ||
|  |         print(f"🌐 Récupération de: {url}") | ||
|  |         response = requests.get(url, headers=headers, timeout=30) | ||
|  |         response.raise_for_status() | ||
|  |          | ||
|  |         soup = BeautifulSoup(response.text, 'html.parser') | ||
|  |          | ||
|  |         print(f"📄 Taille du HTML: {len(response.text)} caractères") | ||
|  |          | ||
|  |         # Chercher tous les éléments qui contiennent des dates | ||
|  |         date_pattern = re.compile(r'\b\d{1,2}\s+(jan|fév|mar|avr|mai|jun|jul|aoû|sep|oct|nov|déc)\b', re.I) | ||
|  |         date_elements = soup.find_all(string=date_pattern) | ||
|  |          | ||
|  |         print(f"📅 Éléments avec dates trouvés: {len(date_elements)}") | ||
|  |          | ||
|  |         # Afficher les premiers éléments avec dates | ||
|  |         for i, elem in enumerate(date_elements[:5]): | ||
|  |             print(f"  {i+1}. {elem.strip()}") | ||
|  |             print(f"     Parent: {elem.parent.name if elem.parent else 'None'}") | ||
|  |             print(f"     Classes: {elem.parent.get('class', []) if elem.parent else 'None'}") | ||
|  |             print() | ||
|  |          | ||
|  |         # Chercher des patterns spécifiques | ||
|  |         print("🔍 Recherche de patterns spécifiques:") | ||
|  |          | ||
|  |         # Chercher des éléments avec des classes communes | ||
|  |         common_classes = ['event', 'agenda', 'manifestation', 'item', 'card', 'content'] | ||
|  |         for class_name in common_classes: | ||
|  |             elements = soup.find_all(class_=re.compile(class_name, re.I)) | ||
|  |             print(f"  Classe '{class_name}': {len(elements)} éléments") | ||
|  |          | ||
|  |         # Chercher des éléments avec du texte contenant des dates | ||
|  |         all_elements = soup.find_all(['div', 'article', 'li', 'p', 'span']) | ||
|  |         elements_with_dates = [] | ||
|  |          | ||
|  |         for elem in all_elements: | ||
|  |             text = elem.get_text() | ||
|  |             if date_pattern.search(text) and len(text) > 10: | ||
|  |                 elements_with_dates.append((elem, text[:100])) | ||
|  |          | ||
|  |         print(f"📋 Éléments avec dates et texte significatif: {len(elements_with_dates)}") | ||
|  |          | ||
|  |         # Afficher les premiers éléments | ||
|  |         for i, (elem, text) in enumerate(elements_with_dates[:3]): | ||
|  |             print(f"  {i+1}. Tag: {elem.name}, Classes: {elem.get('class', [])}") | ||
|  |             print(f"     Texte: {text}...") | ||
|  |             print() | ||
|  |          | ||
|  |         # Chercher des liens | ||
|  |         links = soup.find_all('a', href=True) | ||
|  |         print(f"🔗 Liens trouvés: {len(links)}") | ||
|  |          | ||
|  |         # Afficher quelques liens | ||
|  |         for i, link in enumerate(links[:5]): | ||
|  |             print(f"  {i+1}. {link.get('href')} - {link.get_text()[:50]}...") | ||
|  |          | ||
|  |         # Sauvegarder le HTML pour inspection | ||
|  |         with open('ccpl_debug.html', 'w', encoding='utf-8') as f: | ||
|  |             f.write(response.text) | ||
|  |         print("💾 HTML sauvegardé dans ccpl_debug.html") | ||
|  |          | ||
|  |     except Exception as e: | ||
|  |         print(f"❌ Erreur: {e}") | ||
|  | 
 | ||
|  | if __name__ == "__main__": | ||
|  |     debug_html_structure() |