mirror of
https://forge.chapril.org/tykayn/wololo
synced 2025-06-20 01:34:42 +02:00
réinit
This commit is contained in:
commit
996524bc6d
107 changed files with 1295536 additions and 0 deletions
63
etalab_data/planing_familial/scrap.py
Normal file
63
etalab_data/planing_familial/scrap.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
import json
|
||||
import geopandas as gpd
|
||||
from bs4 import BeautifulSoup
|
||||
from shapely.geometry import Point
|
||||
|
||||
# Charger le fichier HTML
|
||||
with open("liste.html", "r") as file:
|
||||
html = file.read()
|
||||
|
||||
# Analyser le code HTML avec BeautifulSoup
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Trouver toutes les balises <article> qui contiennent les informations des antennes
|
||||
articles = soup.find_all("article", class_="node node--type-hp-antenne node--view-mode-map-result")
|
||||
|
||||
# Liste pour stocker les données de chaque antenne
|
||||
antennes = []
|
||||
|
||||
# Parcourir chaque article et extraire les informations
|
||||
for article in articles:
|
||||
titre = article.find("button", class_="nsb action-title").text.strip()
|
||||
adresse = article.find("p", class_="address").text.strip()
|
||||
website = article.find("a", class_="icon arrow").get("href").strip()
|
||||
violences = article.find("li", class_="icon violences")
|
||||
sexualities = article.find("li", class_="icon sexualities")
|
||||
detection = article.find("li", class_="icon detection")
|
||||
contraception = article.find("li", class_="icon contraception")
|
||||
abortion = article.find("li", class_="icon abortion")
|
||||
|
||||
# Gérer les articles qui n'ont pas de numéro de téléphone
|
||||
telephone = ""
|
||||
try:
|
||||
telephone = article.find("p", class_="tel").text.strip()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
latitude = float(article["data-latlng"].split(",")[0].replace("[", "").replace('"', ""))
|
||||
longitude = float(article["data-latlng"].split(",")[1].replace("]", "").replace('"', ""))
|
||||
geometry = Point(longitude, latitude)
|
||||
|
||||
antenne = {
|
||||
"type": "Feature",
|
||||
"geometry":geometry,
|
||||
"nom": titre,
|
||||
"adresse": adresse,
|
||||
"contact:phone": telephone,
|
||||
"contact:website": 'https://www.planning-familial.org'+website,
|
||||
"family_planning:handles:violences": ('yes' if violences else 'no'),
|
||||
"family_planning:handles:sexualities": ('yes' if sexualities else 'no'),
|
||||
"family_planning:handles:detection": ('yes' if detection else 'no'),
|
||||
"family_planning:handles:abortion": ('yes' if abortion else 'no'),
|
||||
"family_planning:handles:contraception": ('yes' if contraception else 'no'),
|
||||
}
|
||||
|
||||
# print(antenne)
|
||||
|
||||
antennes.append(antenne)
|
||||
|
||||
# Convertir la liste des antennes en un GeoDataFrame
|
||||
gdf = gpd.GeoDataFrame(antennes)
|
||||
|
||||
# Exporter le GeoDataFrame au format GeoJSON
|
||||
gdf.to_file("antennes_planning_familial.json", driver="GeoJSON")
|
Loading…
Add table
Add a link
Reference in a new issue