wololo/csv_to_geojson.utils.ts

import { FeatureCollection, Feature, Point } from "geojson";
import * as fs from 'fs';
import * as path from 'path';
import csvParser from 'csv-parser';


let counter = 0;
let counter_features = 0;
let counter_missing_lat = 0;
let counter_missing_lon = 0;

// Liste des séparateurs à essayer
const separators = ["|", ';', ',', '\t']

export interface CSVConversionOptions {
  dir: string;
  file: string;
  latColumn?: string;
  lonColumn?: string;
  wktColumn?: string;
  hasHeaders?: boolean;
}

function parseCoordinates(value: string, isWKT: boolean = false): { lat: number; lon: number } | null {
  if (!value) return null;

  if (isWKT) {
    // Essayer de parser le format WKT POINT(lon lat)
    const wktMatch = value.match(/^POINT\s?\(\s?(-?\d+(\.\d+)?)\s+(-?\d+(\.\d+)?)\s?\)$/i);
    if (wktMatch && wktMatch.length >= 4) {
      const lon = parseFloat(wktMatch[1]); // Le premier groupe capturé est la longitude
      const lat = parseFloat(wktMatch[3]); // Le troisième groupe capturé est la latitude
      if (!isNaN(lat) && !isNaN(lon)) {
        return { lat, lon };
      }
    }
  } else {
    // Essayer de parser le format "lat;lon"
    const parts = value.split(';');
    if (parts.length >= 2) {
      const lat = parseFloat(parts[0]);
      const lon = parseFloat(parts[1]);
      if (!isNaN(lat) && !isNaN(lon)) {
        return { lat, lon };
      }
    }
  }

  return null; // Retourner null si aucun format valide n'est trouvé
}

function csvToGeoJSON(options: CSVConversionOptions): FeatureCollection<Point> {
  const { dir, file, latColumn, lonColumn, wktColumn, hasHeaders } = options;
  const filePath = path.join(dir, file);

  console.log('csvToGeoJSON: filePath', filePath)
  if (!fs.existsSync(filePath)) {
    console.error(`!!!!!!!!! Le fichier ${filePath} n'existe pas`);
    return { type: 'FeatureCollection', features: [] };
  }

  const csvContent = fs.readFileSync(filePath, 'utf-8');
  const lines = csvContent.split('\n');
  const features: Feature<Point>[] = [];


  // Détecter le meilleur séparateur en utilisant la première ligne
  let bestSeparator = ';';
  let maxColumns = 0;

  if (lines.length > 0) {
    for (const separator of separators) {
      const columnCount = lines[0].split(separator).length;
      if (columnCount > maxColumns) {
        maxColumns = columnCount;
        bestSeparator = separator;
      }
    }
    console.log(`Utilisation du séparateur "${bestSeparator}" (${maxColumns} colonnes détectées)`);
  }

  const startIndex = hasHeaders ? 1 : 0;
  const headers = hasHeaders ? lines[0].split(bestSeparator) : null;

  for (let i = startIndex; i < lines.length; i++) {
    const line = lines[i].trim();
    if (!line) continue;

    const values = line.split(bestSeparator);
    const row: { [key: string]: any } = {};

    if (headers) {
      headers.forEach((header, index) => {
        row[header.trim()] = values[index]?.trim();
      });
    } else {
      values.forEach((value, index) => {
        row[`column${index}`] = value.trim();
      });
    }

    let coordinates: { lat: number; lon: number } | null = null;

    // Priorité 1: Colonne WKT
    if (wktColumn && row[wktColumn]) {
      coordinates = parseCoordinates(row[wktColumn], true);
    }
    // Priorité 2: Colonnes Lat/Lon séparées
    else if (latColumn && lonColumn && row[latColumn] && row[lonColumn]) {
      const lat = parseFloat(row[latColumn]);
      const lon = parseFloat(row[lonColumn]);
      if (!isNaN(lat) && !isNaN(lon)) {
        coordinates = { lat, lon };
      }
    }
    // Priorité 3: Colonne 'geo_point_2d' avec format "lat;lon"
    else if (row['geo_point_2d']) {
      coordinates = parseCoordinates(row['geo_point_2d'], false);
    }

    if (coordinates) {
      features.push({
        type: 'Feature',
        geometry: {
          type: 'Point',
          coordinates: [coordinates.lon, coordinates.lat]
        },
        properties: row
      });
    }
    // else {
    //   // console.log('csvToGeoJSON: coordinates', row)
    // }
  }

  return {
    type: 'FeatureCollection',
    features
  };
}

function checkFile(args: CSVConversionOptions) {
  let filePath = path.join(args.dir, args.file);
  let lineCount = 0;

  // Vérifier si le fichier existe
  if (!fs.existsSync(filePath)) {
    throw new Error(`Le fichier CSV ${filePath} n'existe pas`);
  }

  fs.createReadStream(filePath)
    .on('data', () => {
      lineCount++;
    })
    .on('end', () => {
      console.log(`Nombre de lignes dans le fichier CSV : ${Math.floor(lineCount)}`);
    });


}
function countGeoJSONFeatures(args: CSVConversionOptions) {
  const filePath = path.join(args.dir, `${args.file}.geojson`);

  // Vérifier si le fichier GeoJSON existe
  if (!fs.existsSync(filePath)) {
    console.log(`Le fichier GeoJSON ${filePath} n'existe pas`);
    return;
  }

  // Lire et parser le fichier GeoJSON
  const geoJSON = JSON.parse(fs.readFileSync(filePath, 'utf8'));

  // Compter le nombre de features
  const featureCount = geoJSON.features?.length || 0;

  console.log(`Nombre de features dans le GeoJSON : ${featureCount}`);
  return featureCount;
}

export {
  csvToGeoJSON,
  checkFile,
  countGeoJSONFeatures
};