mirror of
https://forge.chapril.org/tykayn/osm-commerces
synced 2025-10-04 17:04:53 +02:00
fetch des infos sur toutes les communes du pays
This commit is contained in:
parent
83d1972589
commit
692e609a46
2 changed files with 35120 additions and 0 deletions
34876
communes_france.csv
Normal file
34876
communes_france.csv
Normal file
File diff suppressed because it is too large
Load diff
244
fetch_communes.py
Normal file
244
fetch_communes.py
Normal file
|
@ -0,0 +1,244 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
Script to fetch all communes in France from the geo.api.gouv.fr API
|
||||||
|
and save them to a CSV file with all available information.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
BASE_URL = "https://geo.api.gouv.fr"
|
||||||
|
OUTPUT_FILE = "communes_france.csv"
|
||||||
|
REQUEST_DELAY = 0.5 # Delay between API requests in seconds to avoid rate limiting
|
||||||
|
|
||||||
|
def fetch_departments():
|
||||||
|
"""Fetch the list of all departments in France.
|
||||||
|
|
||||||
|
Department numbers go from 1 to 95 (metropolitan France),
|
||||||
|
then from 971 to 976 (overseas departments).
|
||||||
|
"""
|
||||||
|
# Create a list of all department codes
|
||||||
|
dept_codes = []
|
||||||
|
|
||||||
|
# Metropolitan departments (01-95)
|
||||||
|
for i in range(1, 96):
|
||||||
|
# Format with leading zero for single-digit departments
|
||||||
|
dept_codes.append(f"{i:02d}")
|
||||||
|
|
||||||
|
# Special case for Corsica (2A and 2B instead of 20)
|
||||||
|
if "20" in dept_codes:
|
||||||
|
dept_codes.remove("20")
|
||||||
|
dept_codes.extend(["2A", "2B"])
|
||||||
|
|
||||||
|
# Overseas departments (971-976)
|
||||||
|
for i in range(971, 977):
|
||||||
|
dept_codes.append(str(i))
|
||||||
|
|
||||||
|
# Fetch department details from the API
|
||||||
|
url = f"{BASE_URL}/departements"
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status() # Raise an exception for HTTP errors
|
||||||
|
api_departments = response.json()
|
||||||
|
|
||||||
|
# Create a mapping of department code to full department info
|
||||||
|
dept_map = {dept["code"]: dept for dept in api_departments}
|
||||||
|
|
||||||
|
# Build the final list of departments, ensuring all required codes are included
|
||||||
|
departments = []
|
||||||
|
for code in dept_codes:
|
||||||
|
if code in dept_map:
|
||||||
|
# Use the data from the API if available
|
||||||
|
departments.append(dept_map[code])
|
||||||
|
else:
|
||||||
|
# Create a minimal department object if not in the API
|
||||||
|
departments.append({
|
||||||
|
"nom": f"Département {code}",
|
||||||
|
"code": code,
|
||||||
|
"codeRegion": ""
|
||||||
|
})
|
||||||
|
print(f"Warning: Department {code} not found in API, using placeholder data")
|
||||||
|
|
||||||
|
return departments
|
||||||
|
|
||||||
|
def fetch_communes_for_department(dept_code):
|
||||||
|
"""Fetch all communes for a specific department."""
|
||||||
|
url = f"{BASE_URL}/departements/{dept_code}/communes"
|
||||||
|
print(f"Fetching communes for department {dept_code}...")
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Create output directory if it doesn't exist
|
||||||
|
output_path = Path(OUTPUT_FILE)
|
||||||
|
output_path.parent.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# Check if the CSV file already exists
|
||||||
|
existing_communes = {}
|
||||||
|
existing_headers = []
|
||||||
|
|
||||||
|
if output_path.exists():
|
||||||
|
print(f"CSV file {OUTPUT_FILE} already exists. Reading existing communes...")
|
||||||
|
try:
|
||||||
|
with open(output_path, 'r', newline='', encoding='utf-8') as csvfile:
|
||||||
|
reader = csv.DictReader(csvfile)
|
||||||
|
existing_headers = reader.fieldnames
|
||||||
|
for row in reader:
|
||||||
|
# Use the INSEE code as the key to avoid duplicates
|
||||||
|
if 'code' in row and row['code']:
|
||||||
|
existing_communes[row['code']] = row
|
||||||
|
print(f"Read {len(existing_communes)} existing communes from CSV file.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading existing CSV file: {e}")
|
||||||
|
print("Will create a new file.")
|
||||||
|
existing_communes = {}
|
||||||
|
|
||||||
|
# Fetch all departments
|
||||||
|
try:
|
||||||
|
departments = fetch_departments()
|
||||||
|
print(f"Found {len(departments)} departments")
|
||||||
|
|
||||||
|
# Prepare to collect all communes
|
||||||
|
new_communes = []
|
||||||
|
|
||||||
|
# Fetch communes for each department
|
||||||
|
for dept in departments:
|
||||||
|
dept_code = dept['code']
|
||||||
|
try:
|
||||||
|
# Skip department 975 (Saint-Pierre-et-Miquelon) if it's a placeholder
|
||||||
|
# as it might not be available in the API
|
||||||
|
if dept_code == "975" and dept['nom'] == "Département 975":
|
||||||
|
print(f" - Skipping department {dept_code} (placeholder, not available in API)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
communes = fetch_communes_for_department(dept_code)
|
||||||
|
|
||||||
|
# Filter out communes that already exist in the CSV
|
||||||
|
new_dept_communes = []
|
||||||
|
for commune in communes:
|
||||||
|
if commune['code'] not in existing_communes:
|
||||||
|
new_dept_communes.append(commune)
|
||||||
|
|
||||||
|
if new_dept_communes:
|
||||||
|
new_communes.extend(new_dept_communes)
|
||||||
|
print(f" - Added {len(new_dept_communes)} new communes from department {dept_code} ({dept['nom']})")
|
||||||
|
else:
|
||||||
|
print(f" - No new communes found for department {dept_code} ({dept['nom']})")
|
||||||
|
|
||||||
|
time.sleep(REQUEST_DELAY) # Be nice to the API
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching communes for department {dept_code}: {e}")
|
||||||
|
|
||||||
|
print(f"Total new communes found: {len(new_communes)}")
|
||||||
|
|
||||||
|
# If no new communes and no existing communes, exit
|
||||||
|
if not new_communes and not existing_communes:
|
||||||
|
print("No communes found. Exiting.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Process new communes
|
||||||
|
if new_communes:
|
||||||
|
# Get all possible fields from the first commune
|
||||||
|
first_commune = new_communes[0]
|
||||||
|
headers = list(first_commune.keys())
|
||||||
|
|
||||||
|
# Special handling for nested fields like codesPostaux
|
||||||
|
for commune in new_communes:
|
||||||
|
for key, value in commune.items():
|
||||||
|
if isinstance(value, list) and key == "codesPostaux":
|
||||||
|
commune[key] = "|".join(str(v) for v in value)
|
||||||
|
elif isinstance(value, dict) and key == "centre":
|
||||||
|
# Handle coordinates if they exist
|
||||||
|
if "coordinates" in value:
|
||||||
|
commune["longitude"] = value["coordinates"][0]
|
||||||
|
commune["latitude"] = value["coordinates"][1]
|
||||||
|
commune.pop(key, None) # Remove the original nested dict
|
||||||
|
|
||||||
|
# Update headers if we added new fields
|
||||||
|
if "centre" in headers:
|
||||||
|
headers.remove("centre")
|
||||||
|
if any("longitude" in c for c in new_communes):
|
||||||
|
headers.extend(["longitude", "latitude"])
|
||||||
|
else:
|
||||||
|
# If no new communes, use existing headers
|
||||||
|
headers = existing_headers
|
||||||
|
|
||||||
|
# Combine existing and new communes
|
||||||
|
all_communes = list(existing_communes.values())
|
||||||
|
|
||||||
|
# Add new communes to the list
|
||||||
|
for commune in new_communes:
|
||||||
|
# Convert commune to a row with all headers
|
||||||
|
row = {header: commune.get(header, '') for header in headers}
|
||||||
|
all_communes.append(row)
|
||||||
|
|
||||||
|
# Write to CSV
|
||||||
|
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=headers)
|
||||||
|
writer.writeheader()
|
||||||
|
for commune in all_communes:
|
||||||
|
writer.writerow(commune)
|
||||||
|
|
||||||
|
if new_communes:
|
||||||
|
print(f"CSV file updated successfully with {len(new_communes)} new communes: {output_path}")
|
||||||
|
else:
|
||||||
|
print(f"No new communes added. CSV file remains unchanged: {output_path}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
|
||||||
|
def test_sample():
|
||||||
|
"""Run a test with a small sample of departments."""
|
||||||
|
# Sample departments: one metropolitan (01), Corsica (2A), and one overseas (971)
|
||||||
|
sample_dept_codes = ["01", "2A", "971"]
|
||||||
|
|
||||||
|
print(f"Testing with sample departments: {', '.join(sample_dept_codes)}")
|
||||||
|
|
||||||
|
# Fetch department details from the API
|
||||||
|
url = f"{BASE_URL}/departements"
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
api_departments = response.json()
|
||||||
|
|
||||||
|
# Create a mapping of department code to full department info
|
||||||
|
dept_map = {dept["code"]: dept for dept in api_departments}
|
||||||
|
|
||||||
|
# Prepare to collect all communes
|
||||||
|
all_communes = []
|
||||||
|
|
||||||
|
# Fetch communes for each sample department
|
||||||
|
for dept_code in sample_dept_codes:
|
||||||
|
if dept_code in dept_map:
|
||||||
|
dept = dept_map[dept_code]
|
||||||
|
try:
|
||||||
|
communes = fetch_communes_for_department(dept_code)
|
||||||
|
all_communes.extend(communes)
|
||||||
|
print(f" - Added {len(communes)} communes from department {dept_code} ({dept['nom']})")
|
||||||
|
time.sleep(REQUEST_DELAY)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching communes for department {dept_code}: {e}")
|
||||||
|
else:
|
||||||
|
print(f"Department {dept_code} not found in API")
|
||||||
|
|
||||||
|
print(f"Total communes found in sample: {len(all_communes)}")
|
||||||
|
|
||||||
|
# Print a few communes from each department
|
||||||
|
for dept_code in sample_dept_codes:
|
||||||
|
dept_communes = [c for c in all_communes if c.get('codeDepartement') == dept_code]
|
||||||
|
if dept_communes:
|
||||||
|
print(f"\nSample communes from department {dept_code}:")
|
||||||
|
for commune in dept_communes[:3]: # Show first 3 communes
|
||||||
|
print(f" - {commune.get('nom')} (code: {commune.get('code')})")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Uncomment to run the test with sample departments
|
||||||
|
# test_sample()
|
||||||
|
|
||||||
|
# Run the full script
|
||||||
|
main()
|
Loading…
Add table
Add a link
Reference in a new issue