osm-commerces/fetch_communes.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Script to fetch all communes in France from the geo.api.gouv.fr API
and save them to a CSV file with all available information.
"""

import csv
import json
import requests
import time
from pathlib import Path

# Configuration
BASE_URL = "https://geo.api.gouv.fr"
OUTPUT_FILE = "communes_france.csv"
REQUEST_DELAY = 0.5  # Delay between API requests in seconds to avoid rate limiting

def fetch_departments():
    """Fetch the list of all departments in France.
    
    Department numbers go from 1 to 95 (metropolitan France),
    then from 971 to 976 (overseas departments).
    """
    # Create a list of all department codes
    dept_codes = []
    
    # Metropolitan departments (01-95)
    for i in range(1, 96):
        # Format with leading zero for single-digit departments
        dept_codes.append(f"{i:02d}")
    
    # Special case for Corsica (2A and 2B instead of 20)
    if "20" in dept_codes:
        dept_codes.remove("20")
        dept_codes.extend(["2A", "2B"])
    
    # Overseas departments (971-976)
    for i in range(971, 977):
        dept_codes.append(str(i))
    
    # Fetch department details from the API
    url = f"{BASE_URL}/departements"
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for HTTP errors
    api_departments = response.json()
    
    # Create a mapping of department code to full department info
    dept_map = {dept["code"]: dept for dept in api_departments}
    
    # Build the final list of departments, ensuring all required codes are included
    departments = []
    for code in dept_codes:
        if code in dept_map:
            # Use the data from the API if available
            departments.append(dept_map[code])
        else:
            # Create a minimal department object if not in the API
            departments.append({
                "nom": f"Département {code}",
                "code": code,
                "codeRegion": ""
            })
            print(f"Warning: Department {code} not found in API, using placeholder data")
    
    return departments

def fetch_communes_for_department(dept_code):
    """Fetch all communes for a specific department."""
    url = f"{BASE_URL}/departements/{dept_code}/communes"
    print(f"Fetching communes for department {dept_code}...")
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def main():
    # Create output directory if it doesn't exist
    output_path = Path(OUTPUT_FILE)
    output_path.parent.mkdir(exist_ok=True)
    
    # Check if the CSV file already exists
    existing_communes = {}
    existing_headers = []
    
    if output_path.exists():
        print(f"CSV file {OUTPUT_FILE} already exists. Reading existing communes...")
        try:
            with open(output_path, 'r', newline='', encoding='utf-8') as csvfile:
                reader = csv.DictReader(csvfile)
                existing_headers = reader.fieldnames
                for row in reader:
                    # Use the INSEE code as the key to avoid duplicates
                    if 'code' in row and row['code']:
                        existing_communes[row['code']] = row
            print(f"Read {len(existing_communes)} existing communes from CSV file.")
        except Exception as e:
            print(f"Error reading existing CSV file: {e}")
            print("Will create a new file.")
            existing_communes = {}
    
    # Fetch all departments
    try:
        departments = fetch_departments()
        print(f"Found {len(departments)} departments")
        
        # Prepare to collect all communes
        new_communes = []
        
        # Fetch communes for each department
        for dept in departments:
            dept_code = dept['code']
            try:
                # Skip department 975 (Saint-Pierre-et-Miquelon) if it's a placeholder
                # as it might not be available in the API
                if dept_code == "975" and dept['nom'] == "Département 975":
                    print(f"  - Skipping department {dept_code} (placeholder, not available in API)")
                    continue
                    
                communes = fetch_communes_for_department(dept_code)
                
                # Filter out communes that already exist in the CSV
                new_dept_communes = []
                for commune in communes:
                    if commune['code'] not in existing_communes:
                        new_dept_communes.append(commune)
                
                if new_dept_communes:
                    new_communes.extend(new_dept_communes)
                    print(f"  - Added {len(new_dept_communes)} new communes from department {dept_code} ({dept['nom']})")
                else:
                    print(f"  - No new communes found for department {dept_code} ({dept['nom']})")
                
                time.sleep(REQUEST_DELAY)  # Be nice to the API
            except Exception as e:
                print(f"Error fetching communes for department {dept_code}: {e}")
        
        print(f"Total new communes found: {len(new_communes)}")
        
        # If no new communes and no existing communes, exit
        if not new_communes and not existing_communes:
            print("No communes found. Exiting.")
            return
        
        # Process new communes
        if new_communes:
            # Get all possible fields from the first commune
            first_commune = new_communes[0]
            headers = list(first_commune.keys())
            
            # Special handling for nested fields like codesPostaux
            for commune in new_communes:
                for key, value in commune.items():
                    if isinstance(value, list) and key == "codesPostaux":
                        commune[key] = "|".join(str(v) for v in value)
                    elif isinstance(value, dict) and key == "centre":
                        # Handle coordinates if they exist
                        if "coordinates" in value:
                            commune["longitude"] = value["coordinates"][0]
                            commune["latitude"] = value["coordinates"][1]
                        commune.pop(key, None)  # Remove the original nested dict
            
            # Update headers if we added new fields
            if "centre" in headers:
                headers.remove("centre")
                if any("longitude" in c for c in new_communes):
                    headers.extend(["longitude", "latitude"])
        else:
            # If no new communes, use existing headers
            headers = existing_headers
        
        # Combine existing and new communes
        all_communes = list(existing_communes.values())
        
        # Add new communes to the list
        for commune in new_communes:
            # Convert commune to a row with all headers
            row = {header: commune.get(header, '') for header in headers}
            all_communes.append(row)
        
        # Write to CSV
        with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            for commune in all_communes:
                writer.writerow(commune)
        
        if new_communes:
            print(f"CSV file updated successfully with {len(new_communes)} new communes: {output_path}")
        else:
            print(f"No new communes added. CSV file remains unchanged: {output_path}")
        
    except Exception as e:
        print(f"An error occurred: {e}")

def test_sample():
    """Run a test with a small sample of departments."""
    # Sample departments: one metropolitan (01), Corsica (2A), and one overseas (971)
    sample_dept_codes = ["01", "2A", "971"]
    
    print(f"Testing with sample departments: {', '.join(sample_dept_codes)}")
    
    # Fetch department details from the API
    url = f"{BASE_URL}/departements"
    response = requests.get(url)
    response.raise_for_status()
    api_departments = response.json()
    
    # Create a mapping of department code to full department info
    dept_map = {dept["code"]: dept for dept in api_departments}
    
    # Prepare to collect all communes
    all_communes = []
    
    # Fetch communes for each sample department
    for dept_code in sample_dept_codes:
        if dept_code in dept_map:
            dept = dept_map[dept_code]
            try:
                communes = fetch_communes_for_department(dept_code)
                all_communes.extend(communes)
                print(f"  - Added {len(communes)} communes from department {dept_code} ({dept['nom']})")
                time.sleep(REQUEST_DELAY)
            except Exception as e:
                print(f"Error fetching communes for department {dept_code}: {e}")
        else:
            print(f"Department {dept_code} not found in API")
    
    print(f"Total communes found in sample: {len(all_communes)}")
    
    # Print a few communes from each department
    for dept_code in sample_dept_codes:
        dept_communes = [c for c in all_communes if c.get('codeDepartement') == dept_code]
        if dept_communes:
            print(f"\nSample communes from department {dept_code}:")
            for commune in dept_communes[:3]:  # Show first 3 communes
                print(f"  - {commune.get('nom')} (code: {commune.get('code')})")

if __name__ == "__main__":
    # Uncomment to run the test with sample departments
    # test_sample()
    
    # Run the full script
    main()
fetch des infos sur toutes les communes du pays 2025-08-21 16:08:22 +02:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`"""`
			`Script to fetch all communes in France from the geo.api.gouv.fr API`
			`and save them to a CSV file with all available information.`
			`"""`

			`import csv`
			`import json`
			`import requests`
			`import time`
			`from pathlib import Path`

			`# Configuration`
			`BASE_URL = "https://geo.api.gouv.fr"`
			`OUTPUT_FILE = "communes_france.csv"`
			`REQUEST_DELAY = 0.5 # Delay between API requests in seconds to avoid rate limiting`

			`def fetch_departments():`
			`"""Fetch the list of all departments in France.`

			`Department numbers go from 1 to 95 (metropolitan France),`
			`then from 971 to 976 (overseas departments).`
			`"""`
			`# Create a list of all department codes`
			`dept_codes = []`

			`# Metropolitan departments (01-95)`
			`for i in range(1, 96):`
			`# Format with leading zero for single-digit departments`
			`dept_codes.append(f"{i:02d}")`

			`# Special case for Corsica (2A and 2B instead of 20)`
			`if "20" in dept_codes:`
			`dept_codes.remove("20")`
			`dept_codes.extend(["2A", "2B"])`

			`# Overseas departments (971-976)`
			`for i in range(971, 977):`
			`dept_codes.append(str(i))`

			`# Fetch department details from the API`
			`url = f"{BASE_URL}/departements"`
			`response = requests.get(url)`
			`response.raise_for_status() # Raise an exception for HTTP errors`
			`api_departments = response.json()`

			`# Create a mapping of department code to full department info`
			`dept_map = {dept["code"]: dept for dept in api_departments}`

			`# Build the final list of departments, ensuring all required codes are included`
			`departments = []`
			`for code in dept_codes:`
			`if code in dept_map:`
			`# Use the data from the API if available`
			`departments.append(dept_map[code])`
			`else:`
			`# Create a minimal department object if not in the API`
			`departments.append({`
			`"nom": f"Département {code}",`
			`"code": code,`
			`"codeRegion": ""`
			`})`
			`print(f"Warning: Department {code} not found in API, using placeholder data")`

			`return departments`

			`def fetch_communes_for_department(dept_code):`
			`"""Fetch all communes for a specific department."""`
			`url = f"{BASE_URL}/departements/{dept_code}/communes"`
			`print(f"Fetching communes for department {dept_code}...")`
			`response = requests.get(url)`
			`response.raise_for_status()`
			`return response.json()`

			`def main():`
			`# Create output directory if it doesn't exist`
			`output_path = Path(OUTPUT_FILE)`
			`output_path.parent.mkdir(exist_ok=True)`

			`# Check if the CSV file already exists`
			`existing_communes = {}`
			`existing_headers = []`

			`if output_path.exists():`
			`print(f"CSV file {OUTPUT_FILE} already exists. Reading existing communes...")`
			`try:`
			`with open(output_path, 'r', newline='', encoding='utf-8') as csvfile:`
			`reader = csv.DictReader(csvfile)`
			`existing_headers = reader.fieldnames`
			`for row in reader:`
			`# Use the INSEE code as the key to avoid duplicates`
			`if 'code' in row and row['code']:`
			`existing_communes[row['code']] = row`
			`print(f"Read {len(existing_communes)} existing communes from CSV file.")`
			`except Exception as e:`
			`print(f"Error reading existing CSV file: {e}")`
			`print("Will create a new file.")`
			`existing_communes = {}`

			`# Fetch all departments`
			`try:`
			`departments = fetch_departments()`
			`print(f"Found {len(departments)} departments")`

			`# Prepare to collect all communes`
			`new_communes = []`

			`# Fetch communes for each department`
			`for dept in departments:`
			`dept_code = dept['code']`
			`try:`
			`# Skip department 975 (Saint-Pierre-et-Miquelon) if it's a placeholder`
			`# as it might not be available in the API`
			`if dept_code == "975" and dept['nom'] == "Département 975":`
			`print(f" - Skipping department {dept_code} (placeholder, not available in API)")`
			`continue`

			`communes = fetch_communes_for_department(dept_code)`

			`# Filter out communes that already exist in the CSV`
			`new_dept_communes = []`
			`for commune in communes:`
			`if commune['code'] not in existing_communes:`
			`new_dept_communes.append(commune)`

			`if new_dept_communes:`
			`new_communes.extend(new_dept_communes)`
			`print(f" - Added {len(new_dept_communes)} new communes from department {dept_code} ({dept['nom']})")`
			`else:`
			`print(f" - No new communes found for department {dept_code} ({dept['nom']})")`

			`time.sleep(REQUEST_DELAY) # Be nice to the API`
			`except Exception as e:`
			`print(f"Error fetching communes for department {dept_code}: {e}")`

			`print(f"Total new communes found: {len(new_communes)}")`

			`# If no new communes and no existing communes, exit`
			`if not new_communes and not existing_communes:`
			`print("No communes found. Exiting.")`
			`return`

			`# Process new communes`
			`if new_communes:`
			`# Get all possible fields from the first commune`
			`first_commune = new_communes[0]`
			`headers = list(first_commune.keys())`

			`# Special handling for nested fields like codesPostaux`
			`for commune in new_communes:`
			`for key, value in commune.items():`
			`if isinstance(value, list) and key == "codesPostaux":`
			`commune[key] = "\|".join(str(v) for v in value)`
			`elif isinstance(value, dict) and key == "centre":`
			`# Handle coordinates if they exist`
			`if "coordinates" in value:`
			`commune["longitude"] = value["coordinates"][0]`
			`commune["latitude"] = value["coordinates"][1]`
			`commune.pop(key, None) # Remove the original nested dict`

			`# Update headers if we added new fields`
			`if "centre" in headers:`
			`headers.remove("centre")`
			`if any("longitude" in c for c in new_communes):`
			`headers.extend(["longitude", "latitude"])`
			`else:`
			`# If no new communes, use existing headers`
			`headers = existing_headers`

			`# Combine existing and new communes`
			`all_communes = list(existing_communes.values())`

			`# Add new communes to the list`
			`for commune in new_communes:`
			`# Convert commune to a row with all headers`
			`row = {header: commune.get(header, '') for header in headers}`
			`all_communes.append(row)`

			`# Write to CSV`
			`with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:`
			`writer = csv.DictWriter(csvfile, fieldnames=headers)`
			`writer.writeheader()`
			`for commune in all_communes:`
			`writer.writerow(commune)`

			`if new_communes:`
			`print(f"CSV file updated successfully with {len(new_communes)} new communes: {output_path}")`
			`else:`
			`print(f"No new communes added. CSV file remains unchanged: {output_path}")`

			`except Exception as e:`
			`print(f"An error occurred: {e}")`

			`def test_sample():`
			`"""Run a test with a small sample of departments."""`
			`# Sample departments: one metropolitan (01), Corsica (2A), and one overseas (971)`
			`sample_dept_codes = ["01", "2A", "971"]`

			`print(f"Testing with sample departments: {', '.join(sample_dept_codes)}")`

			`# Fetch department details from the API`
			`url = f"{BASE_URL}/departements"`
			`response = requests.get(url)`
			`response.raise_for_status()`
			`api_departments = response.json()`

			`# Create a mapping of department code to full department info`
			`dept_map = {dept["code"]: dept for dept in api_departments}`

			`# Prepare to collect all communes`
			`all_communes = []`

			`# Fetch communes for each sample department`
			`for dept_code in sample_dept_codes:`
			`if dept_code in dept_map:`
			`dept = dept_map[dept_code]`
			`try:`
			`communes = fetch_communes_for_department(dept_code)`
			`all_communes.extend(communes)`
			`print(f" - Added {len(communes)} communes from department {dept_code} ({dept['nom']})")`
			`time.sleep(REQUEST_DELAY)`
			`except Exception as e:`
			`print(f"Error fetching communes for department {dept_code}: {e}")`
			`else:`
			`print(f"Department {dept_code} not found in API")`

			`print(f"Total communes found in sample: {len(all_communes)}")`

			`# Print a few communes from each department`
			`for dept_code in sample_dept_codes:`
			`dept_communes = [c for c in all_communes if c.get('codeDepartement') == dept_code]`
			`if dept_communes:`
			`print(f"\nSample communes from department {dept_code}:")`
			`for commune in dept_communes[:3]: # Show first 3 communes`
			`print(f" - {commune.get('nom')} (code: {commune.get('code')})")`

			`if __name__ == "__main__":`
			`# Uncomment to run the test with sample departments`
			`# test_sample()`

			`# Run the full script`
			`main()`