#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Script to fetch all communes in France from the geo.api.gouv.fr API and save them to a CSV file with all available information. """ import csv import json import requests import time from pathlib import Path # Configuration BASE_URL = "https://geo.api.gouv.fr" OUTPUT_FILE = "communes_france.csv" REQUEST_DELAY = 0.5 # Delay between API requests in seconds to avoid rate limiting def fetch_departments(): """Fetch the list of all departments in France. Department numbers go from 1 to 95 (metropolitan France), then from 971 to 976 (overseas departments). """ # Create a list of all department codes dept_codes = [] # Metropolitan departments (01-95) for i in range(1, 96): # Format with leading zero for single-digit departments dept_codes.append(f"{i:02d}") # Special case for Corsica (2A and 2B instead of 20) if "20" in dept_codes: dept_codes.remove("20") dept_codes.extend(["2A", "2B"]) # Overseas departments (971-976) for i in range(971, 977): dept_codes.append(str(i)) # Fetch department details from the API url = f"{BASE_URL}/departements" response = requests.get(url) response.raise_for_status() # Raise an exception for HTTP errors api_departments = response.json() # Create a mapping of department code to full department info dept_map = {dept["code"]: dept for dept in api_departments} # Build the final list of departments, ensuring all required codes are included departments = [] for code in dept_codes: if code in dept_map: # Use the data from the API if available departments.append(dept_map[code]) else: # Create a minimal department object if not in the API departments.append({ "nom": f"Département {code}", "code": code, "codeRegion": "" }) print(f"Warning: Department {code} not found in API, using placeholder data") return departments def fetch_communes_for_department(dept_code): """Fetch all communes for a specific department.""" url = f"{BASE_URL}/departements/{dept_code}/communes" print(f"Fetching communes for department {dept_code}...") response = requests.get(url) response.raise_for_status() return response.json() def main(): # Create output directory if it doesn't exist output_path = Path(OUTPUT_FILE) output_path.parent.mkdir(exist_ok=True) # Check if the CSV file already exists existing_communes = {} existing_headers = [] if output_path.exists(): print(f"CSV file {OUTPUT_FILE} already exists. Reading existing communes...") try: with open(output_path, 'r', newline='', encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) existing_headers = reader.fieldnames for row in reader: # Use the INSEE code as the key to avoid duplicates if 'code' in row and row['code']: existing_communes[row['code']] = row print(f"Read {len(existing_communes)} existing communes from CSV file.") except Exception as e: print(f"Error reading existing CSV file: {e}") print("Will create a new file.") existing_communes = {} # Fetch all departments try: departments = fetch_departments() print(f"Found {len(departments)} departments") # Prepare to collect all communes new_communes = [] # Fetch communes for each department for dept in departments: dept_code = dept['code'] try: # Skip department 975 (Saint-Pierre-et-Miquelon) if it's a placeholder # as it might not be available in the API if dept_code == "975" and dept['nom'] == "Département 975": print(f" - Skipping department {dept_code} (placeholder, not available in API)") continue communes = fetch_communes_for_department(dept_code) # Filter out communes that already exist in the CSV new_dept_communes = [] for commune in communes: if commune['code'] not in existing_communes: new_dept_communes.append(commune) if new_dept_communes: new_communes.extend(new_dept_communes) print(f" - Added {len(new_dept_communes)} new communes from department {dept_code} ({dept['nom']})") else: print(f" - No new communes found for department {dept_code} ({dept['nom']})") time.sleep(REQUEST_DELAY) # Be nice to the API except Exception as e: print(f"Error fetching communes for department {dept_code}: {e}") print(f"Total new communes found: {len(new_communes)}") # If no new communes and no existing communes, exit if not new_communes and not existing_communes: print("No communes found. Exiting.") return # Process new communes if new_communes: # Get all possible fields from the first commune first_commune = new_communes[0] headers = list(first_commune.keys()) # Special handling for nested fields like codesPostaux for commune in new_communes: for key, value in commune.items(): if isinstance(value, list) and key == "codesPostaux": commune[key] = "|".join(str(v) for v in value) elif isinstance(value, dict) and key == "centre": # Handle coordinates if they exist if "coordinates" in value: commune["longitude"] = value["coordinates"][0] commune["latitude"] = value["coordinates"][1] commune.pop(key, None) # Remove the original nested dict # Update headers if we added new fields if "centre" in headers: headers.remove("centre") if any("longitude" in c for c in new_communes): headers.extend(["longitude", "latitude"]) else: # If no new communes, use existing headers headers = existing_headers # Combine existing and new communes all_communes = list(existing_communes.values()) # Add new communes to the list for commune in new_communes: # Convert commune to a row with all headers row = {header: commune.get(header, '') for header in headers} all_communes.append(row) # Write to CSV with open(output_path, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=headers) writer.writeheader() for commune in all_communes: writer.writerow(commune) if new_communes: print(f"CSV file updated successfully with {len(new_communes)} new communes: {output_path}") else: print(f"No new communes added. CSV file remains unchanged: {output_path}") except Exception as e: print(f"An error occurred: {e}") def test_sample(): """Run a test with a small sample of departments.""" # Sample departments: one metropolitan (01), Corsica (2A), and one overseas (971) sample_dept_codes = ["01", "2A", "971"] print(f"Testing with sample departments: {', '.join(sample_dept_codes)}") # Fetch department details from the API url = f"{BASE_URL}/departements" response = requests.get(url) response.raise_for_status() api_departments = response.json() # Create a mapping of department code to full department info dept_map = {dept["code"]: dept for dept in api_departments} # Prepare to collect all communes all_communes = [] # Fetch communes for each sample department for dept_code in sample_dept_codes: if dept_code in dept_map: dept = dept_map[dept_code] try: communes = fetch_communes_for_department(dept_code) all_communes.extend(communes) print(f" - Added {len(communes)} communes from department {dept_code} ({dept['nom']})") time.sleep(REQUEST_DELAY) except Exception as e: print(f"Error fetching communes for department {dept_code}: {e}") else: print(f"Department {dept_code} not found in API") print(f"Total communes found in sample: {len(all_communes)}") # Print a few communes from each department for dept_code in sample_dept_codes: dept_communes = [c for c in all_communes if c.get('codeDepartement') == dept_code] if dept_communes: print(f"\nSample communes from department {dept_code}:") for commune in dept_communes[:3]: # Show first 3 communes print(f" - {commune.get('nom')} (code: {commune.get('code')})") if __name__ == "__main__": # Uncomment to run the test with sample departments # test_sample() # Run the full script main()