#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Script to fetch all communes in France from the geo.api.gouv.fr API
and save them to a CSV file with all available information.
"""

import csv
import json
import requests
import time
from pathlib import Path

# Configuration
BASE_URL = "https://geo.api.gouv.fr"
OUTPUT_FILE = "communes_france.csv"
REQUEST_DELAY = 0.5  # Delay between API requests in seconds to avoid rate limiting

def fetch_departments():
    """Fetch the list of all departments in France.
    
    Department numbers go from 1 to 95 (metropolitan France),
    then from 971 to 976 (overseas departments).
    """
    # Create a list of all department codes
    dept_codes = []
    
    # Metropolitan departments (01-95)
    for i in range(1, 96):
        # Format with leading zero for single-digit departments
        dept_codes.append(f"{i:02d}")
    
    # Special case for Corsica (2A and 2B instead of 20)
    if "20" in dept_codes:
        dept_codes.remove("20")
        dept_codes.extend(["2A", "2B"])
    
    # Overseas departments (971-976)
    for i in range(971, 977):
        dept_codes.append(str(i))
    
    # Fetch department details from the API
    url = f"{BASE_URL}/departements"
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for HTTP errors
    api_departments = response.json()
    
    # Create a mapping of department code to full department info
    dept_map = {dept["code"]: dept for dept in api_departments}
    
    # Build the final list of departments, ensuring all required codes are included
    departments = []
    for code in dept_codes:
        if code in dept_map:
            # Use the data from the API if available
            departments.append(dept_map[code])
        else:
            # Create a minimal department object if not in the API
            departments.append({
                "nom": f"Département {code}",
                "code": code,
                "codeRegion": ""
            })
            print(f"Warning: Department {code} not found in API, using placeholder data")
    
    return departments

def fetch_communes_for_department(dept_code):
    """Fetch all communes for a specific department."""
    url = f"{BASE_URL}/departements/{dept_code}/communes"
    print(f"Fetching communes for department {dept_code}...")
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def main():
    # Create output directory if it doesn't exist
    output_path = Path(OUTPUT_FILE)
    output_path.parent.mkdir(exist_ok=True)
    
    # Check if the CSV file already exists
    existing_communes = {}
    existing_headers = []
    
    if output_path.exists():
        print(f"CSV file {OUTPUT_FILE} already exists. Reading existing communes...")
        try:
            with open(output_path, 'r', newline='', encoding='utf-8') as csvfile:
                reader = csv.DictReader(csvfile)
                existing_headers = reader.fieldnames
                for row in reader:
                    # Use the INSEE code as the key to avoid duplicates
                    if 'code' in row and row['code']:
                        existing_communes[row['code']] = row
            print(f"Read {len(existing_communes)} existing communes from CSV file.")
        except Exception as e:
            print(f"Error reading existing CSV file: {e}")
            print("Will create a new file.")
            existing_communes = {}
    
    # Fetch all departments
    try:
        departments = fetch_departments()
        print(f"Found {len(departments)} departments")
        
        # Prepare to collect all communes
        new_communes = []
        
        # Fetch communes for each department
        for dept in departments:
            dept_code = dept['code']
            try:
                # Skip department 975 (Saint-Pierre-et-Miquelon) if it's a placeholder
                # as it might not be available in the API
                if dept_code == "975" and dept['nom'] == "Département 975":
                    print(f"  - Skipping department {dept_code} (placeholder, not available in API)")
                    continue
                    
                communes = fetch_communes_for_department(dept_code)
                
                # Filter out communes that already exist in the CSV
                new_dept_communes = []
                for commune in communes:
                    if commune['code'] not in existing_communes:
                        new_dept_communes.append(commune)
                
                if new_dept_communes:
                    new_communes.extend(new_dept_communes)
                    print(f"  - Added {len(new_dept_communes)} new communes from department {dept_code} ({dept['nom']})")
                else:
                    print(f"  - No new communes found for department {dept_code} ({dept['nom']})")
                
                time.sleep(REQUEST_DELAY)  # Be nice to the API
            except Exception as e:
                print(f"Error fetching communes for department {dept_code}: {e}")
        
        print(f"Total new communes found: {len(new_communes)}")
        
        # If no new communes and no existing communes, exit
        if not new_communes and not existing_communes:
            print("No communes found. Exiting.")
            return
        
        # Process new communes
        if new_communes:
            # Get all possible fields from the first commune
            first_commune = new_communes[0]
            headers = list(first_commune.keys())
            
            # Special handling for nested fields like codesPostaux
            for commune in new_communes:
                for key, value in commune.items():
                    if isinstance(value, list) and key == "codesPostaux":
                        commune[key] = "|".join(str(v) for v in value)
                    elif isinstance(value, dict) and key == "centre":
                        # Handle coordinates if they exist
                        if "coordinates" in value:
                            commune["longitude"] = value["coordinates"][0]
                            commune["latitude"] = value["coordinates"][1]
                        commune.pop(key, None)  # Remove the original nested dict
            
            # Update headers if we added new fields
            if "centre" in headers:
                headers.remove("centre")
                if any("longitude" in c for c in new_communes):
                    headers.extend(["longitude", "latitude"])
        else:
            # If no new communes, use existing headers
            headers = existing_headers
        
        # Combine existing and new communes
        all_communes = list(existing_communes.values())
        
        # Add new communes to the list
        for commune in new_communes:
            # Convert commune to a row with all headers
            row = {header: commune.get(header, '') for header in headers}
            all_communes.append(row)
        
        # Write to CSV
        with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            for commune in all_communes:
                writer.writerow(commune)
        
        if new_communes:
            print(f"CSV file updated successfully with {len(new_communes)} new communes: {output_path}")
        else:
            print(f"No new communes added. CSV file remains unchanged: {output_path}")
        
    except Exception as e:
        print(f"An error occurred: {e}")

def test_sample():
    """Run a test with a small sample of departments."""
    # Sample departments: one metropolitan (01), Corsica (2A), and one overseas (971)
    sample_dept_codes = ["01", "2A", "971"]
    
    print(f"Testing with sample departments: {', '.join(sample_dept_codes)}")
    
    # Fetch department details from the API
    url = f"{BASE_URL}/departements"
    response = requests.get(url)
    response.raise_for_status()
    api_departments = response.json()
    
    # Create a mapping of department code to full department info
    dept_map = {dept["code"]: dept for dept in api_departments}
    
    # Prepare to collect all communes
    all_communes = []
    
    # Fetch communes for each sample department
    for dept_code in sample_dept_codes:
        if dept_code in dept_map:
            dept = dept_map[dept_code]
            try:
                communes = fetch_communes_for_department(dept_code)
                all_communes.extend(communes)
                print(f"  - Added {len(communes)} communes from department {dept_code} ({dept['nom']})")
                time.sleep(REQUEST_DELAY)
            except Exception as e:
                print(f"Error fetching communes for department {dept_code}: {e}")
        else:
            print(f"Department {dept_code} not found in API")
    
    print(f"Total communes found in sample: {len(all_communes)}")
    
    # Print a few communes from each department
    for dept_code in sample_dept_codes:
        dept_communes = [c for c in all_communes if c.get('codeDepartement') == dept_code]
        if dept_communes:
            print(f"\nSample communes from department {dept_code}:")
            for commune in dept_communes[:3]:  # Show first 3 communes
                print(f"  - {commune.get('nom')} (code: {commune.get('code')})")

if __name__ == "__main__":
    # Uncomment to run the test with sample departments
    # test_sample()
    
    # Run the full script
    main()