add extractor viparis
This commit is contained in:
parent
fd2d51b662
commit
ee48a3c665
9 changed files with 2221 additions and 4443 deletions
105
extractors/test_viparis_structure.py
Normal file
105
extractors/test_viparis_structure.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script de test pour analyser la structure des données Viparis
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def analyze_viparis_data():
|
||||
"""Analyse la structure des données Viparis"""
|
||||
url = "https://www.viparis.com/actualites-evenements/evenements"
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
try:
|
||||
print(f"🔍 Analyse de la structure des données Viparis: {url}")
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Chercher les scripts avec des données JSON
|
||||
script_tags = soup.find_all('script')
|
||||
|
||||
for i, script in enumerate(script_tags):
|
||||
if script.string and 'window.__NUXT__' in script.string:
|
||||
print(f"\n📜 Script {i+1} trouvé avec window.__NUXT__")
|
||||
script_content = script.string
|
||||
|
||||
# Extraire le JSON
|
||||
match = re.search(r'window\.__NUXT__\s*=\s*({.*?});', script_content, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
nuxt_data = json.loads(match.group(1))
|
||||
print("✅ JSON parsé avec succès")
|
||||
|
||||
# Analyser la structure
|
||||
print(f"\n🔍 Structure de niveau 1:")
|
||||
for key in nuxt_data.keys():
|
||||
print(f" - {key}: {type(nuxt_data[key])}")
|
||||
|
||||
# Chercher les événements
|
||||
if 'state' in nuxt_data:
|
||||
state = nuxt_data['state']
|
||||
print(f"\n🔍 Structure de state:")
|
||||
for key in state.keys():
|
||||
print(f" - {key}: {type(state[key])}")
|
||||
|
||||
# Chercher les événements dans différentes clés possibles
|
||||
possible_event_keys = ['events', 'event', 'data', 'items', 'results']
|
||||
for key in possible_event_keys:
|
||||
if key in state:
|
||||
events_data = state[key]
|
||||
print(f"\n📅 Données d'événements trouvées dans '{key}':")
|
||||
print(f" Type: {type(events_data)}")
|
||||
if isinstance(events_data, list):
|
||||
print(f" Nombre d'éléments: {len(events_data)}")
|
||||
if events_data:
|
||||
print(f" Premier élément: {json.dumps(events_data[0], indent=2)[:500]}...")
|
||||
elif isinstance(events_data, dict):
|
||||
print(f" Clés: {list(events_data.keys())}")
|
||||
if 'data' in events_data:
|
||||
data = events_data['data']
|
||||
if isinstance(data, list):
|
||||
print(f" Nombre d'événements dans data: {len(data)}")
|
||||
if data:
|
||||
print(f" Premier événement: {json.dumps(data[0], indent=2)[:500]}...")
|
||||
|
||||
# Chercher des patterns d'événements dans tout le JSON
|
||||
print(f"\n🔍 Recherche de patterns d'événements...")
|
||||
json_str = json.dumps(nuxt_data)
|
||||
|
||||
# Chercher des noms d'événements connus
|
||||
event_names = ['BattleKart', 'Virtual Room', 'PRODURABLE', 'RÉÉDUCA', 'SALON']
|
||||
for name in event_names:
|
||||
if name in json_str:
|
||||
print(f" ✅ Trouvé '{name}' dans les données")
|
||||
|
||||
# Chercher des dates
|
||||
date_patterns = [r'\d{4}-\d{2}-\d{2}', r'\d{1,2}/\d{1,2}/\d{4}']
|
||||
for pattern in date_patterns:
|
||||
matches = re.findall(pattern, json_str)
|
||||
if matches:
|
||||
print(f" 📅 Dates trouvées ({pattern}): {matches[:5]}")
|
||||
|
||||
break
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"❌ Erreur JSON: {e}")
|
||||
continue
|
||||
else:
|
||||
print("❌ Pattern window.__NUXT__ non trouvé")
|
||||
|
||||
print("\n✅ Analyse terminée")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_viparis_data()
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue