266 lines
9.2 KiB
Python
266 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
Extracteur des vacances scolaires en France (par zones A/B/C et Corse) + jours fériés.
|
||
Sources:
|
||
- Vacances scolaires (ICS): https://www.data.gouv.fr/api/1/datasets/r/e5f40fbc-7a84-4c4a-94e4-55ac4299b222
|
||
- Jours fériés: https://calendrier.api.gouv.fr/jours-feries.json
|
||
|
||
Fonctionnalités:
|
||
- Cache JSON pour limiter les requêtes
|
||
- Paramètres CLI (période, zone optionnelle, dry-run, base_url OEDB, ttl cache)
|
||
- Conversion vers format Feature OEDB (un évènement par zone et par période de vacances)
|
||
- Pas de coordonnées GPS (point [0,0])
|
||
- Rapport succès/échecs à l'issue de l'envoi
|
||
"""
|
||
|
||
import argparse
|
||
import datetime as dt
|
||
import sys
|
||
from typing import Any, Dict, List, Tuple
|
||
|
||
from utils_extractor_common import (
|
||
CacheConfig,
|
||
load_cache,
|
||
save_cache,
|
||
oedb_feature,
|
||
post_oedb_features,
|
||
http_get_json,
|
||
)
|
||
|
||
|
||
DEFAULT_CACHE = "extractors_cache/fr_holidays_cache.json"
|
||
OEDB_DEFAULT = "https://api.openeventdatabase.org"
|
||
ICS_URL = "https://www.data.gouv.fr/api/1/datasets/r/e5f40fbc-7a84-4c4a-94e4-55ac4299b222"
|
||
|
||
|
||
def build_args() -> argparse.Namespace:
|
||
p = argparse.ArgumentParser(description="Extracteur vacances scolaires FR -> OEDB")
|
||
p.add_argument("--start", help="Date de début YYYY-MM-DD", default=None)
|
||
p.add_argument("--end", help="Date de fin YYYY-MM-DD", default=None)
|
||
p.add_argument("--academie", help="Filtrer par académie (optionnel)", default=None)
|
||
p.add_argument("--base-url", help="Base URL OEDB", default=OEDB_DEFAULT)
|
||
p.add_argument("--cache", help="Fichier de cache JSON", default=DEFAULT_CACHE)
|
||
p.add_argument("--cache-ttl", help="Durée de vie du cache (sec)", type=int, default=24*3600)
|
||
p.add_argument("--limit", help="Limiter le nombre d'événements à traiter", type=int, default=None)
|
||
p.add_argument("--dry-run", help="N'envoie pas à l'API OEDB", action="store_true")
|
||
return p.parse_args()
|
||
|
||
|
||
def date_in_range(d: str, start: str, end: str) -> bool:
|
||
if not start and not end:
|
||
return True
|
||
dd = dt.date.fromisoformat(d)
|
||
if start:
|
||
if dd < dt.date.fromisoformat(start):
|
||
return False
|
||
if end:
|
||
if dd > dt.date.fromisoformat(end):
|
||
return False
|
||
return True
|
||
|
||
|
||
def _zones_from_text(summary: str, location: str) -> List[str]:
|
||
s = f"{summary} {location}".lower()
|
||
zones: List[str] = []
|
||
if "corse" in s:
|
||
zones.append("Corse")
|
||
# Chercher motifs Zones A/B/C, B/C, A/B, Zone A, etc.
|
||
# Simpliste mais robuste pour notre source
|
||
if "zones a/b/c" in s or "zones a / b / c" in s:
|
||
zones.extend(["A", "B", "C"])
|
||
else:
|
||
if "zones a/b" in s or "zones a / b" in s:
|
||
zones.extend(["A", "B"])
|
||
if "zones b/c" in s or "zones b / c" in s:
|
||
zones.extend(["B", "C"])
|
||
if "zones a/c" in s or "zones a / c" in s:
|
||
zones.extend(["A", "C"])
|
||
if "zone a" in s:
|
||
zones.append("A")
|
||
if "zone b" in s:
|
||
zones.append("B")
|
||
if "zone c" in s:
|
||
zones.append("C")
|
||
# Dédupliquer en conservant l'ordre
|
||
seen = set()
|
||
out: List[str] = []
|
||
for z in zones:
|
||
if z not in seen:
|
||
seen.add(z)
|
||
out.append(z)
|
||
return out or ["A", "B", "C"] # fallback si non indiqué
|
||
|
||
|
||
def _parse_ics_events(ics_text: str) -> List[Dict[str, Any]]:
|
||
events: List[Dict[str, Any]] = []
|
||
current: Dict[str, str] = {}
|
||
in_event = False
|
||
for raw in ics_text.splitlines():
|
||
line = raw.strip()
|
||
if line == "BEGIN:VEVENT":
|
||
in_event = True
|
||
current = {}
|
||
continue
|
||
if line == "END:VEVENT":
|
||
if current:
|
||
events.append(current)
|
||
in_event = False
|
||
current = {}
|
||
continue
|
||
if not in_event:
|
||
continue
|
||
if line.startswith("DTSTART"):
|
||
# DTSTART;VALUE=DATE:YYYYMMDD
|
||
val = line.split(":", 1)[-1]
|
||
current["DTSTART"] = val
|
||
elif line.startswith("DTEND"):
|
||
val = line.split(":", 1)[-1]
|
||
current["DTEND"] = val
|
||
elif line.startswith("SUMMARY:"):
|
||
current["SUMMARY"] = line[len("SUMMARY:"):].strip()
|
||
elif line.startswith("LOCATION:"):
|
||
current["LOCATION"] = line[len("LOCATION:"):].strip()
|
||
return events
|
||
|
||
|
||
def _yymmdd_to_iso(d: str) -> str:
|
||
# d: YYYYMMDD
|
||
return f"{d[0:4]}-{d[4:6]}-{d[6:8]}"
|
||
|
||
|
||
def fetch_sources(cache_cfg: CacheConfig) -> Dict[str, Any]:
|
||
cache = load_cache(cache_cfg)
|
||
if cache:
|
||
return cache
|
||
|
||
out: Dict[str, Any] = {}
|
||
# Jours fériés France métropolitaine (année courante)
|
||
year = dt.date.today().year
|
||
holidays_url = f"https://calendrier.api.gouv.fr/jours-feries/metropole/{year}.json"
|
||
out["jours_feries"] = http_get_json(holidays_url)
|
||
|
||
# Vacances scolaires via ICS data.gouv
|
||
import requests
|
||
r = requests.get(ICS_URL, timeout=30)
|
||
r.raise_for_status()
|
||
ics_text = r.text
|
||
vevents = _parse_ics_events(ics_text)
|
||
vacances: List[Dict[str, Any]] = []
|
||
for ev in vevents:
|
||
dtstart = ev.get("DTSTART")
|
||
dtend = ev.get("DTEND")
|
||
summary = ev.get("SUMMARY", "")
|
||
location = ev.get("LOCATION", "")
|
||
if not (dtstart and dtend and summary):
|
||
continue
|
||
start_iso = _yymmdd_to_iso(dtstart)
|
||
end_excl_iso = _yymmdd_to_iso(dtend)
|
||
# DTEND valeur-date dans ICS est exclusive -> stop inclusif = end_excl - 1 jour
|
||
end_excl = dt.date.fromisoformat(end_excl_iso)
|
||
stop_incl = (end_excl - dt.timedelta(days=1)).isoformat()
|
||
zones = _zones_from_text(summary, location)
|
||
vacances.append({
|
||
"label": summary,
|
||
"start": start_iso,
|
||
"stop": stop_incl,
|
||
"zones": zones,
|
||
})
|
||
out["vacances_scolaires_ics"] = vacances
|
||
|
||
save_cache(cache_cfg, out)
|
||
return out
|
||
|
||
|
||
def convert_to_oedb(data: Dict[str, Any], start: str | None, end: str | None, academie: str | None, limit: int | None = None) -> List[Dict[str, Any]]:
|
||
features: List[Dict[str, Any]] = []
|
||
|
||
# Jours fériés
|
||
jf: Dict[str, str] = data.get("jours_feries", {}) or {}
|
||
for date_iso, label in jf.items():
|
||
if not date_in_range(date_iso, start, end):
|
||
continue
|
||
# Améliorer le nom avec la date
|
||
try:
|
||
date_obj = dt.date.fromisoformat(date_iso)
|
||
day_name = date_obj.strftime("%A %d %B %Y")
|
||
full_label = f"{label} ({day_name})"
|
||
except:
|
||
full_label = label
|
||
|
||
feature = oedb_feature(
|
||
label=full_label,
|
||
what="time.daylight.holiday",
|
||
start=f"{date_iso}T00:00:00Z",
|
||
stop=f"{date_iso}T23:59:59Z",
|
||
description="Jour férié national",
|
||
where="France",
|
||
)
|
||
# Ajouter la propriété type requise par l'API OEDB
|
||
feature["properties"]["type"] = "scheduled"
|
||
features.append(feature)
|
||
# Appliquer la limite si définie
|
||
if limit and len(features) >= limit:
|
||
return features[:limit]
|
||
|
||
# Vacances scolaires via ICS – un évènement par zone listée
|
||
vs_ics: List[Dict[str, Any]] = data.get("vacances_scolaires_ics", []) or []
|
||
for item in vs_ics:
|
||
s = item.get("start")
|
||
e = item.get("stop")
|
||
label = item.get("label") or "Vacances scolaires"
|
||
zones: List[str] = item.get("zones") or []
|
||
if not (s and e and zones):
|
||
continue
|
||
if not (date_in_range(s, start, end) or date_in_range(e, start, end)):
|
||
continue
|
||
for z in zones:
|
||
if academie and z != academie:
|
||
continue
|
||
# Améliorer le nom avec la période et la zone
|
||
try:
|
||
start_date = dt.date.fromisoformat(s)
|
||
end_date = dt.date.fromisoformat(e)
|
||
period_duration = (end_date - start_date).days + 1
|
||
full_label = f"{label} - Zone {z} ({period_duration} jours)"
|
||
except:
|
||
full_label = f"{label} - Zone {z}"
|
||
feature = oedb_feature(
|
||
label=full_label,
|
||
what="time.holidays",
|
||
start=f"{s}T00:00:00Z",
|
||
stop=f"{e}T23:59:59Z",
|
||
description=f"Vacances scolaires zone {z}",
|
||
where=f"Zone {z}",
|
||
)
|
||
# Ajouter la propriété type requise par l'API OEDB
|
||
feature["properties"]["type"] = "event"
|
||
features.append(feature)
|
||
|
||
return features
|
||
|
||
|
||
def main() -> int:
|
||
args = build_args()
|
||
cache_cfg = CacheConfig(path=args.cache, ttl_seconds=args.cache_ttl)
|
||
|
||
src = fetch_sources(cache_cfg)
|
||
feats = convert_to_oedb(src, args.start, args.end, args.academie, args.limit)
|
||
|
||
# Utiliser un cache pour éviter de renvoyer les événements déjà traités
|
||
sent_cache_path = "extractors_cache/fr_holidays_sent.json"
|
||
ok, failed, neterr = post_oedb_features(args.base_url, feats, dry_run=args.dry_run, sent_cache_path=sent_cache_path)
|
||
print(json_report(ok, failed, neterr))
|
||
return 0
|
||
|
||
|
||
def json_report(ok: int, failed: int, neterr: int) -> str:
|
||
import json
|
||
return json.dumps({"success": ok, "failed": failed, "networkErrors": neterr}, indent=2)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|
||
|
||
|