mirror of
https://forge.chapril.org/tykayn/wololo
synced 2025-10-09 17:02:46 +02:00
réinit
This commit is contained in:
commit
996524bc6d
107 changed files with 1295536 additions and 0 deletions
193
make_variance_from_csv.ts
Normal file
193
make_variance_from_csv.ts
Normal file
|
@ -0,0 +1,193 @@
|
|||
/**
|
||||
prendre un CSV,
|
||||
examiner toutes les colonnes et leurs valeurs,
|
||||
garder en mémoire les valeurs uniques de chaque colonne
|
||||
faire un nouveau csv qui ne montre que les valeurs uniques pour chacune des colonnes
|
||||
et qui compte le nombre de valeurs
|
||||
**/
|
||||
import utils from './mappings/utils'
|
||||
import {parse} from 'csv'
|
||||
|
||||
const fs = require('fs')
|
||||
const minimist = require('minimist')
|
||||
|
||||
let mini_arguments: any = minimist(process.argv.slice(2))
|
||||
|
||||
// interface VarianceType {
|
||||
// [key: string]: Array<string>
|
||||
// }
|
||||
|
||||
let csv_content = 'variance de dataset\n';
|
||||
let separator = '\t';
|
||||
let separator_fallback = ',';
|
||||
let data_variance: any = {};
|
||||
|
||||
let folder = 'irve_bornes_recharge'
|
||||
let inputFile = 'latest.csv'
|
||||
let fileNameOutput = `variance_${inputFile}`;
|
||||
let inputPath = `./etalab_data/${folder}/${inputFile}`
|
||||
// let inputPath = './etalab_data/toilettes/small_datas.csv'
|
||||
let outputPath = `etalab_data/${folder}`
|
||||
|
||||
console.log('mini_arguments', mini_arguments)
|
||||
if (mini_arguments['inputFile']) {
|
||||
inputFile = `${mini_arguments['input-file']}`
|
||||
}
|
||||
if (mini_arguments['source']) {
|
||||
inputPath = mini_arguments['source']
|
||||
}
|
||||
if (mini_arguments['separator']) {
|
||||
separator = mini_arguments['separator']
|
||||
}
|
||||
let columns_headings: Array<string> = [];
|
||||
let lines_count = 0;
|
||||
let longest_variance_count = 0;
|
||||
|
||||
|
||||
function getColumnsFromRow(row: string) {
|
||||
let headings: any = []
|
||||
console.log('elem', row)
|
||||
|
||||
headings = row.split(separator)
|
||||
return headings
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* sort unique values ascending in each column
|
||||
*/
|
||||
function reorderValuesInDataVariance() {
|
||||
columns_headings.forEach((heading: string) => {
|
||||
data_variance[heading] = data_variance[heading].sort((a: any, b: any) => a - b)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
console.log('read file ', inputPath)
|
||||
fs.readFile(inputPath, function (err: any, fileData: any) {
|
||||
if (err) {
|
||||
throw new Error(err)
|
||||
} else {
|
||||
parse(fileData, {columns: false, trim: true}, function (err: any, lines: any) {
|
||||
// Your CSV data is in an array of arrays passed to this callback as rows.
|
||||
|
||||
if (err) {
|
||||
throw new Error(err)
|
||||
}
|
||||
console.log('line ', lines_count)
|
||||
console.log('lines.length', lines.length)
|
||||
|
||||
lines.forEach((line: any) => {
|
||||
|
||||
line = line[0]
|
||||
|
||||
if (lines_count === 0) {
|
||||
console.log('line', line)
|
||||
if (line.indexOf(separator) === -1) {
|
||||
console.log('separator not found: ', separator, 'trying other separator')
|
||||
if (line.indexOf(separator_fallback) === -1) {
|
||||
console.log('separator not found: ', separator_fallback)
|
||||
|
||||
throw new Error('no separator found in csv')
|
||||
|
||||
} else {
|
||||
console.log('fallback separator found: ', separator_fallback, 'now using it ')
|
||||
separator = separator_fallback
|
||||
}
|
||||
}
|
||||
|
||||
columns_headings = getColumnsFromRow(line)
|
||||
console.log('columns_headings.length', columns_headings.length)
|
||||
// console.log('columns_headings', columns_headings)
|
||||
let headers = Object.keys(columns_headings)
|
||||
columns_headings.forEach((header: string) => {
|
||||
data_variance[header] = []
|
||||
})
|
||||
console.log('data_variance', data_variance)
|
||||
} else {
|
||||
// lignes suivantes
|
||||
|
||||
let column_index = 0
|
||||
|
||||
|
||||
line.split(separator).forEach((value: string) => {
|
||||
value = value.trim()
|
||||
let column_header_current = columns_headings[column_index]
|
||||
// console.log('column_index', column_index)
|
||||
// dans chaque colonne, vérifier que la valeur n'est pas déjà présente
|
||||
// dans les index de variance
|
||||
// si la valeur est nouvelle, l'ajouter
|
||||
|
||||
if (data_variance[column_header_current].indexOf(value) === -1) {
|
||||
data_variance[column_header_current].push(value)
|
||||
if (
|
||||
data_variance[column_header_current].length > longest_variance_count
|
||||
) {
|
||||
longest_variance_count = data_variance[column_header_current].length
|
||||
}
|
||||
|
||||
} else {
|
||||
console.log('value', value, ' déjà présente dans la collection', column_header_current)
|
||||
}
|
||||
|
||||
column_index++
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
lines_count++
|
||||
})
|
||||
|
||||
console.log('lines_count', lines_count)
|
||||
console.log('longest_variance_count', longest_variance_count)
|
||||
|
||||
reorderValuesInDataVariance()
|
||||
utils.writeFile(fileNameOutput, writeCSVVariance(), outputPath)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
console.log('parsing done')
|
||||
// console.log('data_variance', data_variance)
|
||||
})
|
||||
|
||||
/**
|
||||
* écrit un csv avec les données de variance du dataset donné
|
||||
*/
|
||||
function writeCSVVariance() {
|
||||
|
||||
let csv_content = `;variance de ${inputPath};généré le:;${new Date()};lignes du csv original:;${lines_count};fait avec make_variance_from_csv.ts de Tykayn
|
||||
`
|
||||
let columns = Object.keys(data_variance);
|
||||
|
||||
// add headings
|
||||
columns_headings.forEach((heading: string) => {
|
||||
csv_content = csv_content + separator + heading
|
||||
})
|
||||
csv_content = csv_content + '\n'
|
||||
// add max length of variance for each column
|
||||
let ii = 0
|
||||
columns.forEach((column: string) => {
|
||||
// console.log('column', column, data_variance[column].length)
|
||||
csv_content = csv_content + separator + data_variance[column].length
|
||||
ii++
|
||||
})
|
||||
|
||||
csv_content = csv_content + '\n\n'
|
||||
// add content of values
|
||||
for (let ii = 0; ii < longest_variance_count; ii++) {
|
||||
csv_content = csv_content + '\n'
|
||||
columns.forEach((column: any) => {
|
||||
if (ii < data_variance[column].length) {
|
||||
|
||||
let currentValue = data_variance[column][ii]
|
||||
csv_content = csv_content + separator + currentValue
|
||||
} else {
|
||||
csv_content = csv_content + separator
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return csv_content;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue