osm-commerces/fetch_communes.py

244 lines
9.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Script to fetch all communes in France from the geo.api.gouv.fr API
and save them to a CSV file with all available information.
"""
import csv
import json
import requests
import time
from pathlib import Path
# Configuration
BASE_URL = "https://geo.api.gouv.fr"
OUTPUT_FILE = "communes_france.csv"
REQUEST_DELAY = 0.5 # Delay between API requests in seconds to avoid rate limiting
def fetch_departments():
"""Fetch the list of all departments in France.
Department numbers go from 1 to 95 (metropolitan France),
then from 971 to 976 (overseas departments).
"""
# Create a list of all department codes
dept_codes = []
# Metropolitan departments (01-95)
for i in range(1, 96):
# Format with leading zero for single-digit departments
dept_codes.append(f"{i:02d}")
# Special case for Corsica (2A and 2B instead of 20)
if "20" in dept_codes:
dept_codes.remove("20")
dept_codes.extend(["2A", "2B"])
# Overseas departments (971-976)
for i in range(971, 977):
dept_codes.append(str(i))
# Fetch department details from the API
url = f"{BASE_URL}/departements"
response = requests.get(url)
response.raise_for_status() # Raise an exception for HTTP errors
api_departments = response.json()
# Create a mapping of department code to full department info
dept_map = {dept["code"]: dept for dept in api_departments}
# Build the final list of departments, ensuring all required codes are included
departments = []
for code in dept_codes:
if code in dept_map:
# Use the data from the API if available
departments.append(dept_map[code])
else:
# Create a minimal department object if not in the API
departments.append({
"nom": f"Département {code}",
"code": code,
"codeRegion": ""
})
print(f"Warning: Department {code} not found in API, using placeholder data")
return departments
def fetch_communes_for_department(dept_code):
"""Fetch all communes for a specific department."""
url = f"{BASE_URL}/departements/{dept_code}/communes"
print(f"Fetching communes for department {dept_code}...")
response = requests.get(url)
response.raise_for_status()
return response.json()
def main():
# Create output directory if it doesn't exist
output_path = Path(OUTPUT_FILE)
output_path.parent.mkdir(exist_ok=True)
# Check if the CSV file already exists
existing_communes = {}
existing_headers = []
if output_path.exists():
print(f"CSV file {OUTPUT_FILE} already exists. Reading existing communes...")
try:
with open(output_path, 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
existing_headers = reader.fieldnames
for row in reader:
# Use the INSEE code as the key to avoid duplicates
if 'code' in row and row['code']:
existing_communes[row['code']] = row
print(f"Read {len(existing_communes)} existing communes from CSV file.")
except Exception as e:
print(f"Error reading existing CSV file: {e}")
print("Will create a new file.")
existing_communes = {}
# Fetch all departments
try:
departments = fetch_departments()
print(f"Found {len(departments)} departments")
# Prepare to collect all communes
new_communes = []
# Fetch communes for each department
for dept in departments:
dept_code = dept['code']
try:
# Skip department 975 (Saint-Pierre-et-Miquelon) if it's a placeholder
# as it might not be available in the API
if dept_code == "975" and dept['nom'] == "Département 975":
print(f" - Skipping department {dept_code} (placeholder, not available in API)")
continue
communes = fetch_communes_for_department(dept_code)
# Filter out communes that already exist in the CSV
new_dept_communes = []
for commune in communes:
if commune['code'] not in existing_communes:
new_dept_communes.append(commune)
if new_dept_communes:
new_communes.extend(new_dept_communes)
print(f" - Added {len(new_dept_communes)} new communes from department {dept_code} ({dept['nom']})")
else:
print(f" - No new communes found for department {dept_code} ({dept['nom']})")
time.sleep(REQUEST_DELAY) # Be nice to the API
except Exception as e:
print(f"Error fetching communes for department {dept_code}: {e}")
print(f"Total new communes found: {len(new_communes)}")
# If no new communes and no existing communes, exit
if not new_communes and not existing_communes:
print("No communes found. Exiting.")
return
# Process new communes
if new_communes:
# Get all possible fields from the first commune
first_commune = new_communes[0]
headers = list(first_commune.keys())
# Special handling for nested fields like codesPostaux
for commune in new_communes:
for key, value in commune.items():
if isinstance(value, list) and key == "codesPostaux":
commune[key] = "|".join(str(v) for v in value)
elif isinstance(value, dict) and key == "centre":
# Handle coordinates if they exist
if "coordinates" in value:
commune["longitude"] = value["coordinates"][0]
commune["latitude"] = value["coordinates"][1]
commune.pop(key, None) # Remove the original nested dict
# Update headers if we added new fields
if "centre" in headers:
headers.remove("centre")
if any("longitude" in c for c in new_communes):
headers.extend(["longitude", "latitude"])
else:
# If no new communes, use existing headers
headers = existing_headers
# Combine existing and new communes
all_communes = list(existing_communes.values())
# Add new communes to the list
for commune in new_communes:
# Convert commune to a row with all headers
row = {header: commune.get(header, '') for header in headers}
all_communes.append(row)
# Write to CSV
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers)
writer.writeheader()
for commune in all_communes:
writer.writerow(commune)
if new_communes:
print(f"CSV file updated successfully with {len(new_communes)} new communes: {output_path}")
else:
print(f"No new communes added. CSV file remains unchanged: {output_path}")
except Exception as e:
print(f"An error occurred: {e}")
def test_sample():
"""Run a test with a small sample of departments."""
# Sample departments: one metropolitan (01), Corsica (2A), and one overseas (971)
sample_dept_codes = ["01", "2A", "971"]
print(f"Testing with sample departments: {', '.join(sample_dept_codes)}")
# Fetch department details from the API
url = f"{BASE_URL}/departements"
response = requests.get(url)
response.raise_for_status()
api_departments = response.json()
# Create a mapping of department code to full department info
dept_map = {dept["code"]: dept for dept in api_departments}
# Prepare to collect all communes
all_communes = []
# Fetch communes for each sample department
for dept_code in sample_dept_codes:
if dept_code in dept_map:
dept = dept_map[dept_code]
try:
communes = fetch_communes_for_department(dept_code)
all_communes.extend(communes)
print(f" - Added {len(communes)} communes from department {dept_code} ({dept['nom']})")
time.sleep(REQUEST_DELAY)
except Exception as e:
print(f"Error fetching communes for department {dept_code}: {e}")
else:
print(f"Department {dept_code} not found in API")
print(f"Total communes found in sample: {len(all_communes)}")
# Print a few communes from each department
for dept_code in sample_dept_codes:
dept_communes = [c for c in all_communes if c.get('codeDepartement') == dept_code]
if dept_communes:
print(f"\nSample communes from department {dept_code}:")
for commune in dept_communes[:3]: # Show first 3 communes
print(f" - {commune.get('nom')} (code: {commune.get('code')})")
if __name__ == "__main__":
# Uncomment to run the test with sample departments
# test_sample()
# Run the full script
main()