add wiki compare

This commit is contained in:
Tykayn 2025-08-21 16:50:17 +02:00 committed by tykayn
parent 692e609a46
commit 38fbc451f5
9 changed files with 81151 additions and 126 deletions

View file

@ -5,6 +5,7 @@ namespace App\Controller;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Annotation\Route;
use Symfony\Component\HttpFoundation\Request;
class WikiController extends AbstractController
{
@ -23,13 +24,294 @@ class WikiController extends AbstractController
$headers = array_shift($csvData);
$wikiPages = [];
$missingTranslations = [];
// First pass: collect all staleness scores to find min and max
$stalenessScores = [];
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
$stalenessScores[] = (float)$page['staleness_score'];
}
}
// Find min and max scores for normalization
$minScore = !empty($stalenessScores) ? min($stalenessScores) : 0;
$maxScore = !empty($stalenessScores) ? max($stalenessScores) : 100;
// Second pass: process pages and normalize scores
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
// Normalize staleness score to 0-100 range (0 = best, 100 = worst)
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
$originalScore = (float)$page['staleness_score'];
// Avoid division by zero
if ($maxScore > $minScore) {
$normalizedScore = ($originalScore - $minScore) / ($maxScore - $minScore) * 100;
} else {
$normalizedScore = 50; // Default to middle value if all scores are the same
}
// Round to 2 decimal places
$page['staleness_score'] = round($normalizedScore, 2);
}
$wikiPages[$page['key']][$page['language']] = $page;
}
// Identify pages missing French translations
foreach ($wikiPages as $key => $languages) {
if (isset($languages['en']) && !isset($languages['fr'])) {
$missingTranslations[$key] = $languages['en'];
}
}
// Sort wiki pages by staleness score (descending)
uasort($wikiPages, function($a, $b) {
$scoreA = isset($a['en']) && isset($a['fr']) && isset($a['en']['staleness_score']) ? (float)$a['en']['staleness_score'] : 0;
$scoreB = isset($b['en']) && isset($b['fr']) && isset($b['en']['staleness_score']) ? (float)$b['en']['staleness_score'] : 0;
return $scoreB <=> $scoreA;
});
return $this->render('admin/wiki.html.twig', [
'wiki_pages' => $wikiPages,
'missing_translations' => $missingTranslations,
]);
}
#[Route('/admin/wiki/compare/{key}', name: 'app_admin_wiki_compare')]
public function compare(string $key): Response
{
$csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv';
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
if (!file_exists($csvFile)) {
$this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.');
return $this->redirectToRoute('app_admin_index');
}
$csvData = array_map('str_getcsv', file($csvFile));
$headers = array_shift($csvData);
// Process CSV data to find the requested key
$enPage = null;
$frPage = null;
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
if ($page['key'] === $key) {
if ($page['language'] === 'en') {
$enPage = $page;
} elseif ($page['language'] === 'fr') {
$frPage = $page;
}
}
}
// If English page doesn't exist, redirect back with error
if (!$enPage) {
$this->addFlash('error', 'La page wiki pour la clé "' . $key . '" n\'existe pas.');
return $this->redirectToRoute('app_admin_wiki');
}
// Get detailed content comparison from JSON file
$detailedComparison = null;
$mediaDiff = 0;
if (file_exists($jsonFile)) {
$jsonData = json_decode(file_get_contents($jsonFile), true);
foreach ($jsonData as $page) {
if ($page['key'] === $key) {
$detailedComparison = [
'section_comparison' => $page['section_comparison'] ?? null,
'link_comparison' => $page['link_comparison'] ?? null,
'media_comparison' => $page['media_comparison'] ?? null
];
$mediaDiff = $page['media_diff'] ?? 0;
break;
}
}
}
// Calculate staleness score components
$scoreComponents = [];
if ($frPage) {
// Calculate date difference in days
$dateDiff = 0;
if ($enPage['last_modified'] && $frPage['last_modified']) {
$enDate = \DateTime::createFromFormat('Y-m-d', $enPage['last_modified']);
$frDate = \DateTime::createFromFormat('Y-m-d', $frPage['last_modified']);
if ($enDate && $frDate) {
$dateDiff = ($enDate->getTimestamp() - $frDate->getTimestamp()) / (60 * 60 * 24);
}
}
// Calculate content differences
$wordDiff = $enPage['word_count'] - $frPage['word_count'];
$sectionDiff = $enPage['sections'] - $frPage['sections'];
$linkDiff = $enPage['link_count'] - $frPage['link_count'];
// Calculate score components
$dateComponent = abs($dateDiff) * 0.2;
$wordComponent = abs($wordDiff) / 100 * 0.5;
$sectionComponent = abs($sectionDiff) * 0.15;
$linkComponent = abs($linkDiff) / 10 * 0.15;
$scoreComponents = [
'date' => [
'value' => $dateDiff,
'weight' => 0.2,
'component' => $dateComponent,
'description' => 'Différence de date (en jours)'
],
'word' => [
'value' => $wordDiff,
'weight' => 0.5,
'component' => $wordComponent,
'description' => 'Différence de nombre de mots'
],
'section' => [
'value' => $sectionDiff,
'weight' => 0.15,
'component' => $sectionComponent,
'description' => 'Différence de nombre de sections'
],
'link' => [
'value' => $linkDiff,
'weight' => 0.15,
'component' => $linkComponent,
'description' => 'Différence de nombre de liens'
]
];
// Add media component if available
if (isset($enPage['media_count']) && isset($frPage['media_count'])) {
$mediaComponent = abs($mediaDiff) / 5 * 0.1;
$scoreComponents['media'] = [
'value' => $mediaDiff,
'weight' => 0.1,
'component' => $mediaComponent,
'description' => 'Différence de nombre d\'images'
];
// Adjust other weights to maintain total of 1.0
$scoreComponents['date']['weight'] = 0.2;
$scoreComponents['word']['weight'] = 0.45;
$scoreComponents['section']['weight'] = 0.15;
$scoreComponents['link']['weight'] = 0.1;
}
}
// Create URL for new French page if it doesn't exist
$createFrUrl = null;
if (!$frPage) {
$createFrUrl = 'https://wiki.openstreetmap.org/wiki/FR:Key:' . $key;
}
// Format section titles for copy functionality
$enSections = '';
$frSections = '';
if ($detailedComparison && $detailedComparison['section_comparison']) {
// English sections
if ($enPage) {
$enSectionsList = [];
// Add common sections
foreach ($detailedComparison['section_comparison']['common'] as $section) {
$enSectionsList[] = str_repeat('=', $section['en']['level']) . ' ' .
$section['en']['title'] . ' ' .
str_repeat('=', $section['en']['level']);
}
// Add English-only sections
foreach ($detailedComparison['section_comparison']['en_only'] as $section) {
$enSectionsList[] = str_repeat('=', $section['level']) . ' ' .
$section['title'] . ' ' .
str_repeat('=', $section['level']) . ' (EN only)';
}
$enSections = implode("\n", $enSectionsList);
}
// French sections
if ($frPage) {
$frSectionsList = [];
// Add common sections
foreach ($detailedComparison['section_comparison']['common'] as $section) {
$frSectionsList[] = str_repeat('=', $section['fr']['level']) . ' ' .
$section['fr']['title'] . ' ' .
str_repeat('=', $section['fr']['level']);
}
// Add French-only sections
foreach ($detailedComparison['section_comparison']['fr_only'] as $section) {
$frSectionsList[] = str_repeat('=', $section['level']) . ' ' .
$section['title'] . ' ' .
str_repeat('=', $section['level']) . ' (FR only)';
}
$frSections = implode("\n", $frSectionsList);
}
}
// Format links for copy functionality
$enLinks = '';
$frLinks = '';
if ($detailedComparison && $detailedComparison['link_comparison']) {
// English links
if ($enPage) {
$enLinksList = [];
// Add common links
foreach ($detailedComparison['link_comparison']['common'] as $link) {
$enLinksList[] = $link['en']['text'] . ' - ' . $link['en']['href'];
}
// Add English-only links
foreach ($detailedComparison['link_comparison']['en_only'] as $link) {
$enLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (EN only)';
}
$enLinks = implode("\n", $enLinksList);
}
// French links
if ($frPage) {
$frLinksList = [];
// Add common links
foreach ($detailedComparison['link_comparison']['common'] as $link) {
$frLinksList[] = $link['fr']['text'] . ' - ' . $link['fr']['href'];
}
// Add French-only links
foreach ($detailedComparison['link_comparison']['fr_only'] as $link) {
$frLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (FR only)';
}
$frLinks = implode("\n", $frLinksList);
}
}
return $this->render('admin/wiki_compare.html.twig', [
'key' => $key,
'en_page' => $enPage,
'fr_page' => $frPage,
'score_components' => $scoreComponents,
'create_fr_url' => $createFrUrl,
'detailed_comparison' => $detailedComparison,
'en_sections' => $enSections,
'fr_sections' => $frSections,
'en_links' => $enLinks,
'fr_links' => $frLinks
]);
}
}

View file

@ -7,6 +7,13 @@
<link href='{{ asset('js/maplibre/maplibre-gl.css') }}' rel='stylesheet'/>
<link href='{{ asset('css/city-sidebar.css') }}' rel='stylesheet'/>
<style>
#alertes_osmose .counter{
background: #8A2BE2;
border-radius: 10em;
margin-right: 1ch;
padding: 0.5rem;
color: white;
}
#themeMap {
margin-top: 1rem;
}
@ -267,6 +274,7 @@
</div>
<div id="alertes_osmose"></div>
<div class="chart-container">
<canvas id="themeChart"></canvas>
</div>
@ -871,6 +879,12 @@
return;
}
const divOsmose = document.querySelector(('#alertes_osmose'))
if(divOsmose){
divOsmose.innerHTML = `<span class="counter">${data.issues.length}</span> objets à ajouter selon Osmose`;
}
console.log(`[Osmose] ${data.issues.length} analyses trouvées pour le thème ${theme}`);
// Ajouter les marqueurs pour chaque analyse
@ -881,14 +895,14 @@
.setHTML(
(() => {
return `<div id="osmose-popup-${issue.id}" onclick="loadOsmoseIssueDetails(${issue.id})">Chargement des détails... ${issue.id}</div>`
return `<div id="osmose-popup-${issue.id}" >Proposition d'ajout <button onclick="loadOsmoseIssueDetails(${issue.id})">${issue.id}</button></div>`
})());
lapopup.on('open', () => {
// Charger les détails de l'analyse lorsque le popup est ouvert
console.log('open popup', issue)
// loadOsmoseIssueDetails(issue.id);
});
// lapopup.on('open', () => {
// // Charger les détails de l'analyse lorsque le popup est ouvert
// console.log('open popup', issue)
// // loadOsmoseIssueDetails(issue.id);
// });
// Créer un marqueur pour l'analyse
const marker = new maplibregl.Marker({
@ -914,7 +928,7 @@
function loadOsmoseIssueDetails(issueId) {
const detailsUrl = `https://osmose.openstreetmap.fr/api/0.3/issue/${issueId}?langs=auto`;
console.log('detailsUrl', detailsUrl)
console.log('loadOsmoseIssueDetails detailsUrl', detailsUrl)
fetch(detailsUrl)
.then(response => response.json())
.then(data => {

View file

@ -19,6 +19,8 @@
<th rowspan="2">Clé</th>
<th colspan="3" class="text-center">Version anglaise</th>
<th colspan="3" class="text-center">Version française</th>
<th rowspan="2" class="text-center">Score de<br>décrépitude</th>
<th rowspan="2" class="text-center">Liens</th>
</tr>
<tr>
<th>Sections</th>
@ -31,27 +33,45 @@
</thead>
<tbody>
{% for key, languages in wiki_pages %}
<tr>
<td>
<strong>{{ key }}</strong>
</td>
{% if languages['en'] is defined %}
{% if languages['en'] is defined and languages['fr'] is defined %}
<tr>
<td>
<strong>{{ key }}</strong>
</td>
<td>{{ languages['en'].sections }}</td>
<td>{{ languages['en'].word_count }}</td>
<td>{{ languages['en'].link_count }}</td>
{% else %}
<td colspan="3" class="text-center text-muted">Page non disponible</td>
{% endif %}
{% if languages['fr'] is defined %}
<td>{{ languages['fr'].sections }}</td>
<td>{{ languages['fr'].word_count }}</td>
<td>{{ languages['fr'].link_count }}</td>
{% else %}
<td colspan="3" class="text-center text-muted">Page non disponible</td>
{% endif %}
</tr>
<td class="text-center">
{% set score = languages['en'].staleness_score|default(0) %}
{% if score > 50 %}
<span class="badge bg-danger">{{ score }}</span>
{% elseif score > 20 %}
<span class="badge bg-warning text-dark">{{ score }}</span>
{% else %}
<span class="badge bg-success">{{ score }}</span>
{% endif %}
</td>
<td class="text-center">
<div class="btn-group" role="group">
<a href="{{ languages['en'].url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Version anglaise">
<i class="bi bi-translate"></i> EN
</a>
<a href="{{ languages['fr'].url }}" target="_blank" class="btn btn-sm btn-outline-info" title="Version française">
<i class="bi bi-translate"></i> FR
</a>
<a href="{{ path('app_admin_wiki_compare', {'key': key}) }}" class="btn btn-sm btn-outline-secondary" title="Comparer les versions">
<i class="bi bi-arrows-angle-expand"></i> Comparer
</a>
</div>
</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
@ -59,6 +79,57 @@
</div>
</div>
{% if missing_translations|length > 0 %}
<div class="card mb-4">
<div class="card-header bg-warning text-dark">
<h2>Pages manquantes en français</h2>
</div>
<div class="card-body">
<p>Ces pages wiki ont une version anglaise mais pas de traduction française.</p>
<div class="table-responsive">
<table class="table table-striped table-hover">
<thead class="thead-dark">
<tr>
<th>Clé</th>
<th>Sections</th>
<th>Mots</th>
<th>Liens</th>
<th>Score de décrépitude</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for key, page in missing_translations %}
<tr>
<td><strong>{{ key }}</strong></td>
<td>{{ page.sections }}</td>
<td>{{ page.word_count }}</td>
<td>{{ page.link_count }}</td>
<td>
<span class="badge bg-danger">{{ page.staleness_score|default(100) }}</span>
</td>
<td class="text-center">
<div class="btn-group" role="group">
<a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Version anglaise">
<i class="bi bi-translate"></i> EN
</a>
<a href="{{ path('app_admin_wiki_compare', {'key': key}) }}" class="btn btn-sm btn-outline-secondary" title="Voir les détails et créer la page française">
<i class="bi bi-arrows-angle-expand"></i> Comparer
</a>
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
{% endif %}
<p>
le score de fraîcheur prend en compte d'avantage la différence entre le nombre de mots que l'ancienneté de modification.
On compte aussi le nombre de sections et de liens.
</p>
<div class="mt-3">
</div>

View file

@ -0,0 +1,577 @@
{% extends 'base.html.twig' %}
{% block title %}Comparaison Wiki OSM - {{ key }}{% endblock %}
{% block body %}
<style>
.card:hover{
transform: none !important;
box-shadow: none !important;
}
.title-level-2 {
padding-left: 1.5rem;
}
.title-level-3 {
padding-left: 2.8rem;
}
</style>
<div class="container mt-4">
<h1>Comparaison Wiki OpenStreetMap - {{ key }}</h1>
<p class="lead">Comparaison détaillée des pages wiki en français et en anglais pour la clé OSM "{{ key }}".</p>
{% if fr_page %}
<div class="card mb-4">
<div class="card-header">
<h2>Comparaison des versions</h2>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-primary text-white">
<h3>Version anglaise</h3>
<p class="mb-0">
<small>Dernière modification: {{ en_page.last_modified }}</small>
</p>
</div>
<div class="card-body">
<ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center">
Sections
<span class="badge bg-primary rounded-pill">{{ en_page.sections }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Mots
<span class="badge bg-primary rounded-pill">{{ en_page.word_count }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Liens
<span class="badge bg-primary rounded-pill">{{ en_page.link_count }}</span>
</li>
</ul>
<div class="d-grid gap-2">
<a href="{{ en_page.url }}" target="_blank" class="btn btn-outline-primary">
<i class="bi bi-box-arrow-up-right"></i> Voir la page
</a>
<button class="btn btn-outline-secondary copy-btn" data-content="sections-en">
<i class="bi bi-clipboard"></i> Copier la liste des sections
</button>
<button class="btn btn-outline-secondary copy-btn" data-content="links-en">
<i class="bi bi-clipboard"></i> Copier la liste des liens
</button>
</div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-info text-white">
<h3>Version française</h3>
<p class="mb-0">
<small>Dernière modification: {{ fr_page.last_modified }}</small>
</p>
</div>
<div class="card-body">
<ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center">
Sections
<span class="badge bg-info rounded-pill">{{ fr_page.sections }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Mots
<span class="badge bg-info rounded-pill">{{ fr_page.word_count }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Liens
<span class="badge bg-info rounded-pill">{{ fr_page.link_count }}</span>
</li>
</ul>
<div class="d-grid gap-2">
<a href="{{ fr_page.url }}" target="_blank" class="btn btn-outline-info">
<i class="bi bi-box-arrow-up-right"></i> Voir la page
</a>
<button class="btn btn-outline-secondary copy-btn" data-content="sections-fr">
<i class="bi bi-clipboard"></i> Copier la liste des sections
</button>
<button class="btn btn-outline-secondary copy-btn" data-content="links-fr">
<i class="bi bi-clipboard"></i> Copier la liste des liens
</button>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{% if detailed_comparison and detailed_comparison.section_comparison %}
<div class="card mb-4">
<div class="card-header">
<h2>Comparaison des sections</h2>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-primary text-white">
<h3>Sections en anglais</h3>
<span class="badge bg-light text-dark">{{ en_page.sections }} sections</span>
</div>
<div class="card-body">
{# <h4>Sections communes ({{ detailed_comparison.section_comparison.common|length }})</h4>#}
{# <ul class="list-group mb-3">#}
{# {% for section in detailed_comparison.section_comparison.common %}#}
{# <li class="list-group-item">#}
{# <span class="badge bg-secondary">h{{ section.en.level }}</span>#}
{# {{ section.en.title }}#}
{# </li>#}
{# {% endfor %}#}
{# </ul>#}
<h4>Sections uniquement en anglais ({{ detailed_comparison.section_comparison.en_only|length }})</h4>
<ul class="list-group">
{% for section in detailed_comparison.section_comparison.en_only %}
<li class="list-group-item list-group-item-warning title-level-{{ section.level }}">
<span class="badge bg-secondary">h{{ section.level }}</span>
{{ section.title }}
</li>
{% endfor %}
</ul>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-info text-white">
<h3>Sections en français</h3>
<span class="badge bg-light text-dark">{{ fr_page.sections }} sections</span>
</div>
<div class="card-body">
{# <h4>Sections communes ({{ detailed_comparison.section_comparison.common|length }})</h4>#}
{# <ul class="list-group mb-3">#}
{# {% for section in detailed_comparison.section_comparison.common %}#}
{# <li class="list-group-item">#}
{# <span class="badge bg-secondary">h{{ section.fr.level }}</span>#}
{# {{ section.fr.title }}#}
{# </li>#}
{# {% endfor %}#}
{# </ul>#}
<h4>Sections uniquement en français ({{ detailed_comparison.section_comparison.fr_only|length }})</h4>
<ul class="list-group">
{% for section in detailed_comparison.section_comparison.fr_only %}
<li class="list-group-item list-group-item-info title-level-{{ section.level }}">
<span class="badge bg-secondary">h{{ section.level }}</span>
{{ section.title }}
</li>
{% endfor %}
</ul>
</div>
</div>
</div>
</div>
</div>
</div>
{% endif %}
{% if detailed_comparison and detailed_comparison.media_comparison %}
<div class="card mb-4">
<div class="card-header">
<h2>Comparaison des médias</h2>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-primary text-white">
<h3>Images en anglais</h3>
<span class="badge bg-light text-dark">{{ en_page.media_count|default(0) }} images</span>
</div>
<div class="card-body">
{# <h4>Images communes ({{ detailed_comparison.media_comparison.common|length }})</h4>#}
{# <div class="row mb-3">#}
{# {% for media in detailed_comparison.media_comparison.common %}#}
{# <div class="col-md-6 mb-2">#}
{# <div class="card">#}
{# <img src="{{ media.en.src }}" class="card-img-top" alt="{{ media.en.alt }}" style="max-height: 150px; object-fit: contain;">#}
{# <div class="card-body p-2">#}
{# <p class="card-text small">{{ media.en.alt }}</p>#}
{# </div>#}
{# </div>#}
{# </div>#}
{# {% endfor %}#}
{# </div>#}
<h4>Images uniquement en anglais ({{ detailed_comparison.media_comparison.en_only|length }})</h4>
<div class="row">
{% for media in detailed_comparison.media_comparison.en_only %}
<div class="col-md-6 mb-2">
<div class="card border-warning">
<img src="{{ media.src }}" class="card-img-top" alt="{{ media.alt }}" style="max-height: 150px; object-fit: contain;">
<div class="card-body p-2">
<p class="card-text small">{{ media.alt }}</p>
</div>
</div>
</div>
{% endfor %}
</div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-info text-white">
<h3>Images en français</h3>
<span class="badge bg-light text-dark">{{ fr_page.media_count|default(0) }} images</span>
</div>
<div class="card-body">
{# <h4>Images communes ({{ detailed_comparison.media_comparison.common|length }})</h4>#}
{# <div class="row mb-3">#}
{# {% for media in detailed_comparison.media_comparison.common %}#}
{# <div class="col-md-6 mb-2">#}
{# <div class="card">#}
{# <img src="{{ media.fr.src }}" class="card-img-top" alt="{{ media.fr.alt }}" style="max-height: 150px; object-fit: contain;">#}
{# <div class="card-body p-2">#}
{# <p class="card-text small">{{ media.fr.alt }}</p>#}
{# </div>#}
{# </div>#}
{# </div>#}
{# {% endfor %}#}
{# </div>#}
<h4>Images uniquement en français ({{ detailed_comparison.media_comparison.fr_only|length }})</h4>
<div class="row">
{% for media in detailed_comparison.media_comparison.fr_only %}
<div class="col-md-6 mb-2">
<div class="card border-info">
<img src="{{ media.src }}" class="card-img-top" alt="{{ media.alt }}" style="max-height: 150px; object-fit: contain;">
<div class="card-body p-2">
<p class="card-text small">{{ media.alt }}</p>
</div>
</div>
</div>
{% endfor %}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{% endif %}
{% if detailed_comparison and detailed_comparison.link_comparison %}
<div class="card mb-4">
<div class="card-header">
<h2>Comparaison des liens</h2>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-primary text-white">
<h3>Liens en anglais</h3>
<span class="badge bg-light text-dark">{{ en_page.link_count }} liens</span>
</div>
<div class="card-body">
<h4>Liens communs ({{ detailed_comparison.link_comparison.common|length }})</h4>
<div class="table-responsive mb-3">
<table class="table table-sm">
<thead>
<tr>
<th>Texte</th>
<th>URL</th>
</tr>
</thead>
<tbody>
{% for link in detailed_comparison.link_comparison.common|slice(0, 10) %}
<tr>
<td>{{ link.en.text }}</td>
<td><a href="{{ link.en.href }}" target="_blank" class="small">{{ link.en.href|slice(0, 30) }}...</a></td>
</tr>
{% endfor %}
{% if detailed_comparison.link_comparison.common|length > 10 %}
<tr>
<td colspan="2" class="text-center">
<em>{{ detailed_comparison.link_comparison.common|length - 10 }} liens supplémentaires...</em>
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
<h4>Liens uniquement en anglais ({{ detailed_comparison.link_comparison.en_only|length }})</h4>
<div class="table-responsive">
<table class="table table-sm table-warning">
<thead>
<tr>
<th>Texte</th>
<th>URL</th>
</tr>
</thead>
<tbody>
{% for link in detailed_comparison.link_comparison.en_only|slice(0, 10) %}
<tr>
<td>{{ link.text }}</td>
<td><a href="{{ link.href }}" target="_blank" class="small">{{ link.href|slice(0, 30) }}...</a></td>
</tr>
{% endfor %}
{% if detailed_comparison.link_comparison.en_only|length > 10 %}
<tr>
<td colspan="2" class="text-center">
<em>{{ detailed_comparison.link_comparison.en_only|length - 10 }} liens supplémentaires...</em>
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-info text-white">
<h3>Liens en français</h3>
<span class="badge bg-light text-dark">{{ fr_page.link_count }} liens</span>
</div>
<div class="card-body">
<h4>Liens communs ({{ detailed_comparison.link_comparison.common|length }})</h4>
<div class="table-responsive mb-3">
<table class="table table-sm">
<thead>
<tr>
<th>Texte</th>
<th>URL</th>
</tr>
</thead>
<tbody>
{% for link in detailed_comparison.link_comparison.common|slice(0, 10) %}
<tr>
<td>{{ link.fr.text }}</td>
<td><a href="{{ link.fr.href }}" target="_blank" class="small">{{ link.fr.href|slice(0, 30) }}...</a></td>
</tr>
{% endfor %}
{% if detailed_comparison.link_comparison.common|length > 10 %}
<tr>
<td colspan="2" class="text-center">
<em>{{ detailed_comparison.link_comparison.common|length - 10 }} liens supplémentaires...</em>
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
<h4>Liens uniquement en français ({{ detailed_comparison.link_comparison.fr_only|length }})</h4>
<div class="table-responsive">
<table class="table table-sm table-info">
<thead>
<tr>
<th>Texte</th>
<th>URL</th>
</tr>
</thead>
<tbody>
{% for link in detailed_comparison.link_comparison.fr_only|slice(0, 10) %}
<tr>
<td>{{ link.text }}</td>
<td><a href="{{ link.href }}" target="_blank" class="small">{{ link.href|slice(0, 30) }}...</a></td>
</tr>
{% endfor %}
{% if detailed_comparison.link_comparison.fr_only|length > 10 %}
<tr>
<td colspan="2" class="text-center">
<em>{{ detailed_comparison.link_comparison.fr_only|length - 10 }} liens supplémentaires...</em>
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{% endif %}
{% else %}
<div class="card mb-4">
<div class="card-header bg-warning text-dark">
<h2>Traduction française manquante</h2>
</div>
<div class="card-body">
<div class="alert alert-warning">
<p><i class="bi bi-exclamation-triangle"></i> <strong>La page wiki pour la clé "{{ key }}" n'existe pas en français.</strong></p>
<p>Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.</p>
</div>
<div class="row">
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-primary text-white">
<h3>Version anglaise</h3>
<p class="mb-0">
<small>Dernière modification: {{ en_page.last_modified }}</small>
</p>
</div>
<div class="card-body">
<ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center">
Sections
<span class="badge bg-primary rounded-pill">{{ en_page.sections }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Mots
<span class="badge bg-primary rounded-pill">{{ en_page.word_count }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Liens
<span class="badge bg-primary rounded-pill">{{ en_page.link_count }}</span>
</li>
</ul>
<div class="d-grid gap-2">
<a href="{{ en_page.url }}" target="_blank" class="btn btn-outline-primary">
<i class="bi bi-box-arrow-up-right"></i> Voir la page anglaise
</a>
<button class="btn btn-outline-secondary copy-btn" data-content="sections-en">
<i class="bi bi-clipboard"></i> Copier la liste des sections
</button>
<button class="btn btn-outline-secondary copy-btn" data-content="links-en">
<i class="bi bi-clipboard"></i> Copier la liste des liens
</button>
</div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-info text-white">
<h3>Créer la version française</h3>
</div>
<div class="card-body">
<p>Vous pouvez créer la page française en suivant ces étapes :</p>
<ol>
<li>Consultez la version anglaise pour comprendre le contenu</li>
<li>Créez une nouvelle page avec le préfixe "FR:" sur le wiki OSM</li>
<li>Traduisez le contenu en respectant la structure de la page anglaise</li>
<li>Ajoutez des exemples pertinents pour le contexte français</li>
</ol>
<div class="d-grid gap-2 mt-4">
<a href="{{ create_fr_url }}" target="_blank" class="btn btn-success">
<i class="bi bi-plus-circle"></i> Créer la page française
</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{% endif %}
<div class="card mb-4">
<div class="card-header">
<h2>Score de décrépitude</h2>
</div>
<div class="card-body">
<p>Le score de décrépitude est calculé en prenant en compte plusieurs facteurs, avec une pondération plus importante pour la différence de nombre de mots :</p>
<div class="table-responsive">
<table class="table table-striped">
<thead>
<tr>
<th>Facteur</th>
<th>Valeur</th>
<th>Poids</th>
<th>Contribution</th>
</tr>
</thead>
<tbody>
{% for key, component in score_components %}
<tr>
<td>{{ component.description }}</td>
<td>{{ component.value }}</td>
<td>{{ component.weight * 100 }}%</td>
<td>{{ component.component|round(2) }}</td>
</tr>
{% endfor %}
<tr class="table-dark">
<td colspan="3"><strong>Score total</strong></td>
<td>
{% set total_score = 0 %}
{% for key, component in score_components %}
{% set total_score = total_score + component.component %}
{% endfor %}
<strong>{{ total_score|round(2) }}</strong>
</td>
</tr>
</tbody>
</table>
</div>
<div class="alert alert-info">
<p><strong>Comment interpréter ce score :</strong></p>
<ul>
<li>Plus le score est élevé, plus la page française est considérée comme "décrépite" par rapport à la version anglaise.</li>
<li>La différence de nombre de mots compte pour 50% du score, car c'est l'indicateur le plus important de la complétude de la traduction.</li>
<li>Les différences de sections (15%), de liens (15%) et de date de modification (20%) complètent le score.</li>
</ul>
</div>
</div>
</div>
<div class="mt-3">
<a href="{{ path('app_admin_wiki') }}" class="btn btn-secondary">
<i class="bi bi-arrow-left"></i> Retour à la liste des pages wiki
</a>
</div>
</div>
<!-- Hidden content for copy functionality -->
<div id="sections-en" style="display: none;">
{{ en_sections|default('Sections de la page anglaise pour ' ~ key) }}
</div>
<div id="links-en" style="display: none;">
{{ en_links|default('Liens de la page anglaise pour ' ~ key) }}
</div>
{% if fr_page %}
<div id="sections-fr" style="display: none;">
{{ fr_sections|default('Sections de la page française pour ' ~ key) }}
</div>
<div id="links-fr" style="display: none;">
{{ fr_links|default('Liens de la page française pour ' ~ key) }}
</div>
{% endif %}
{% block javascripts %}
<script>
document.addEventListener('DOMContentLoaded', function() {
// Add click event listeners to all copy buttons
document.querySelectorAll('.copy-btn').forEach(function(button) {
button.addEventListener('click', function() {
// Get the content to copy
const contentId = this.getAttribute('data-content');
const content = document.getElementById(contentId).textContent;
// Copy to clipboard
navigator.clipboard.writeText(content).then(function() {
// Change button text temporarily to indicate success
const originalText = button.innerHTML;
button.innerHTML = '<i class="bi bi-check"></i> Copié!';
setTimeout(function() {
button.innerHTML = originalText;
}, 2000);
}).catch(function(err) {
console.error('Erreur lors de la copie :', err);
alert('Erreur lors de la copie. Veuillez réessayer.');
});
});
});
});
</script>
{% endblock %}
{% endblock %}

View file

@ -41,6 +41,7 @@
<i class="bi bi-gear"></i> Admin
</a>
<ul class="dropdown-menu" aria-labelledby="adminDropdown">
<li><a class="dropdown-item" href="{{ path('app_admin_wiki') }}"><i class="bi bi-book-fill"></i> Différence de traduction dans le wiki</a></li>
<li><a class="dropdown-item" href="{{ path('app_admin_demandes') }}"><i class="bi bi-list-ul"></i> Liste des demandes</a></li>
<li><a class="dropdown-item" href="{{ path('app_admin_contacted_places') }}"><i class="bi bi-envelope"></i> Places contactées</a></li>
<li><hr class="dropdown-divider"></li>

File diff suppressed because it is too large Load diff

View file

@ -38,5 +38,45 @@
{
"key": "surface",
"count": 72048796
},
{
"key": "addr:country",
"count": 50511041
},
{
"key": "landuse",
"count": 48098130
},
{
"key": "power",
"count": 44639130
},
{
"key": "waterway",
"count": 37153506
},
{
"key": "building:levels",
"count": 36426521
},
{
"key": "amenity",
"count": 30874207
},
{
"key": "barrier",
"count": 30063102
},
{
"key": "source:date",
"count": 29107386
},
{
"key": "service",
"count": 28326346
},
{
"key": "addr:state",
"count": 25331031
}
]

View file

@ -4,7 +4,7 @@
"""
wiki_compare.py
This script fetches the 10 most used OpenStreetMap keys from TagInfo,
This script fetches the most used OpenStreetMap keys from TagInfo,
compares their English and French wiki pages, and identifies which pages
need updating based on modification dates and content analysis.
@ -12,10 +12,10 @@ Usage:
python wiki_compare.py
Output:
- top_keys.json: JSON file containing the 10 most used OSM keys
- top_keys.json: JSON file containing the most used OSM keys
- wiki_pages.csv: CSV file with information about each wiki page
- outdated_pages.json: JSON file containing pages that need updating
- A console output listing the 10 wiki pages that need updating
- A console output listing the wiki pages that need updating
"""
import json
@ -42,8 +42,10 @@ WIKI_BASE_URL_FR = "https://wiki.openstreetmap.org/wiki/FR:Key:"
TOP_KEYS_FILE = "top_keys.json"
WIKI_PAGES_CSV = "wiki_pages.csv"
OUTDATED_PAGES_FILE = "outdated_pages.json"
# Number of wiki pages to examine
NUM_WIKI_PAGES = 20
def fetch_top_keys(limit=50):
def fetch_top_keys(limit=NUM_WIKI_PAGES):
"""
Fetch the most used OSM keys from TagInfo API
@ -135,8 +137,28 @@ def fetch_wiki_page(key, language='en'):
except ValueError:
logger.warning(f"Could not parse date: {date_str}")
# Count sections (h2, h3, h4)
sections = len(soup.select('h2, h3, h4'))
# Extract sections (h2, h3, h4)
section_elements = soup.select('h2, h3, h4')
sections = len(section_elements)
# Extract section titles
section_titles = []
for section_elem in section_elements:
# Skip sections that are part of the table of contents or navigation
if section_elem.parent and section_elem.parent.get('id') in ['toc', 'mw-navigation']:
continue
# Get the text of the section title, removing any edit links
for edit_link in section_elem.select('.mw-editsection'):
edit_link.extract()
section_title = section_elem.get_text(strip=True)
section_level = int(section_elem.name[1]) # h2 -> 2, h3 -> 3, h4 -> 4
section_titles.append({
'title': section_title,
'level': section_level
})
# Count words in the content
content = soup.select_one('#mw-content-text')
@ -149,12 +171,55 @@ def fetch_wiki_page(key, language='en'):
text = content.get_text(separator=' ', strip=True)
word_count = len(text.split())
# Count links
# Extract links
links = content.select('a')
link_count = len(links)
# Get link details (text and href)
link_details = []
for link in links:
href = link.get('href', '')
# Skip edit section links and other non-content links
if 'action=edit' in href or 'redlink=1' in href or not href:
continue
# Make relative URLs absolute
if href.startswith('/'):
href = 'https://wiki.openstreetmap.org' + href
link_text = link.get_text(strip=True)
if link_text: # Only include links with text
link_details.append({
'text': link_text,
'href': href
})
# Extract media (images)
media_elements = content.select('img')
media_count = len(media_elements)
# Get media details (src and alt text)
media_details = []
for img in media_elements:
src = img.get('src', '')
if src:
# Make relative URLs absolute
if src.startswith('//'):
src = 'https:' + src
elif src.startswith('/'):
src = 'https://wiki.openstreetmap.org' + src
alt_text = img.get('alt', '')
media_details.append({
'src': src,
'alt': alt_text
})
else:
word_count = 0
link_count = 0
link_details = []
media_count = 0
media_details = []
return {
'key': key,
@ -162,8 +227,12 @@ def fetch_wiki_page(key, language='en'):
'url': url,
'last_modified': last_modified,
'sections': sections,
'section_titles': section_titles,
'word_count': word_count,
'link_count': link_count
'link_count': link_count,
'link_details': link_details,
'media_count': media_count,
'media_details': media_details
}
except requests.exceptions.RequestException as e:
@ -202,6 +271,21 @@ def analyze_wiki_pages(pages):
if 'en' not in lang_pages or 'fr' not in lang_pages:
if 'en' in lang_pages:
# French page is missing
# For missing French pages, calculate a high staleness score
# Use word count as the main factor (50% weight)
missing_staleness_score = (
30 * 0.2 + # Assume 30 days outdated (20%)
lang_pages['en']['word_count'] / 100 * 0.5 + # Word count (50%)
lang_pages['en']['sections'] * 0.15 + # Sections (15%)
lang_pages['en']['link_count'] / 10 * 0.15 # Links (15%)
)
# Round to 2 decimal places and ensure it's high
missing_staleness_score = max(100, round(missing_staleness_score, 2))
# Get media count or default to 0
media_count = lang_pages['en'].get('media_count', 0)
needs_update.append({
'key': key,
'reason': 'French page missing',
@ -211,7 +295,12 @@ def analyze_wiki_pages(pages):
'word_diff': lang_pages['en']['word_count'],
'section_diff': lang_pages['en']['sections'],
'link_diff': lang_pages['en']['link_count'],
'priority': 100 # High priority for missing pages
'media_diff': media_count,
'staleness_score': missing_staleness_score,
'priority': missing_staleness_score, # Use staleness score as priority
'section_comparison': None, # No comparison possible
'link_comparison': None, # No comparison possible
'media_comparison': None # No comparison possible
})
continue
@ -231,28 +320,130 @@ def analyze_wiki_pages(pages):
word_diff = en_page['word_count'] - fr_page['word_count']
section_diff = en_page['sections'] - fr_page['sections']
link_diff = en_page['link_count'] - fr_page['link_count']
media_diff = en_page.get('media_count', 0) - fr_page.get('media_count', 0)
# Calculate priority score (higher means needs more urgent update)
# Weight factors can be adjusted
priority = (
abs(date_diff) * 0.4 + # Date difference
abs(word_diff) / 100 * 0.25 + # Word count difference (normalized)
abs(section_diff) * 0.2 + # Section difference
abs(link_diff) / 10 * 0.15 # Link count difference (normalized)
# Calculate staleness score (higher means more outdated/stale)
# Weight factors adjusted to emphasize word count differences
staleness_score = (
abs(date_diff) * 0.2 + # Date difference (20%)
abs(word_diff) / 100 * 0.5 + # Word count difference (normalized) (50%)
abs(section_diff) * 0.15 + # Section difference (15%)
abs(link_diff) / 10 * 0.15 # Link count difference (normalized) (15%)
)
# Round to 2 decimal places for display
staleness_score = round(staleness_score, 2)
# Compare sections between English and French pages
section_comparison = {
'en_only': [],
'fr_only': [],
'common': []
}
# Extract section titles for comparison
en_sections = {section['title'].lower(): section for section in en_page.get('section_titles', [])}
fr_sections = {section['title'].lower(): section for section in fr_page.get('section_titles', [])}
# Find sections only in English
for title, section in en_sections.items():
if title not in fr_sections:
section_comparison['en_only'].append(section)
# Find sections only in French
for title, section in fr_sections.items():
if title not in en_sections:
section_comparison['fr_only'].append(section)
# Find common sections
for title in en_sections.keys():
if title in fr_sections:
section_comparison['common'].append({
'en': en_sections[title],
'fr': fr_sections[title]
})
# Compare links between English and French pages
link_comparison = {
'en_only': [],
'fr_only': [],
'common': []
}
# Extract link texts for comparison (case insensitive)
en_links = {link['text'].lower(): link for link in en_page.get('link_details', [])}
fr_links = {link['text'].lower(): link for link in fr_page.get('link_details', [])}
# Find links only in English
for text, link in en_links.items():
if text not in fr_links:
link_comparison['en_only'].append(link)
# Find links only in French
for text, link in fr_links.items():
if text not in en_links:
link_comparison['fr_only'].append(link)
# Find common links
for text in en_links.keys():
if text in fr_links:
link_comparison['common'].append({
'en': en_links[text],
'fr': fr_links[text]
})
# Compare media between English and French pages
media_comparison = {
'en_only': [],
'fr_only': [],
'common': []
}
# Extract media alt texts for comparison (case insensitive)
en_media = {media['alt'].lower(): media for media in en_page.get('media_details', []) if media['alt']}
fr_media = {media['alt'].lower(): media for media in fr_page.get('media_details', []) if media['alt']}
# Find media only in English
for alt, media in en_media.items():
if alt not in fr_media:
media_comparison['en_only'].append(media)
# Find media only in French
for alt, media in fr_media.items():
if alt not in en_media:
media_comparison['fr_only'].append(media)
# Find common media
for alt in en_media.keys():
if alt in fr_media:
media_comparison['common'].append({
'en': en_media[alt],
'fr': fr_media[alt]
})
# Add media without alt text to their respective language-only lists
for media in en_page.get('media_details', []):
if not media['alt'] or media['alt'].lower() not in en_media:
media_comparison['en_only'].append(media)
for media in fr_page.get('media_details', []):
if not media['alt'] or media['alt'].lower() not in fr_media:
media_comparison['fr_only'].append(media)
if date_diff > 30 or word_diff > 200 or section_diff > 2 or link_diff > 20 or fr_page['word_count'] < en_page['word_count'] * 0.7:
reason = []
if date_diff > 30:
reason.append(f"French page outdated by {date_diff} days")
reason.append(f"La version Française est datée de {date_diff} jours")
if word_diff > 200:
reason.append(f"English page has {word_diff} more words")
reason.append(f"La version Anglaise a {word_diff} plus de mots")
if section_diff > 2:
reason.append(f"English page has {section_diff} more sections")
reason.append(f"La version Anglaise a {section_diff} plus de sections")
if link_diff > 20:
reason.append(f"English page has {link_diff} more links")
reason.append(f"La version Anglaise a {link_diff} plus de liens")
if media_diff > 5:
reason.append(f"La version Anglaise a {media_diff} plus d'images")
if fr_page['word_count'] < en_page['word_count'] * 0.7:
reason.append(f"French page is only {fr_page['word_count'] / en_page['word_count']:.0%} of English content")
reason.append(f"La version Française a seulement {fr_page['word_count'] / en_page['word_count']:.0%} % du contenu en Anglais.")
needs_update.append({
'key': key,
@ -263,7 +454,12 @@ def analyze_wiki_pages(pages):
'word_diff': word_diff,
'section_diff': section_diff,
'link_diff': link_diff,
'priority': priority
'media_diff': media_diff,
'staleness_score': staleness_score,
'priority': staleness_score, # Use staleness score as priority
'section_comparison': section_comparison,
'link_comparison': link_comparison,
'media_comparison': media_comparison
})
# Sort by priority (descending)
@ -279,7 +475,7 @@ def main():
os.makedirs(os.path.dirname(os.path.abspath(__file__)), exist_ok=True)
# Fetch top keys
top_keys = fetch_top_keys(10)
top_keys = fetch_top_keys(NUM_WIKI_PAGES)
if not top_keys:
logger.error("Failed to fetch top keys. Exiting.")
@ -304,16 +500,96 @@ def main():
if fr_page:
wiki_pages.append(fr_page)
# Save wiki pages to CSV
# Process wiki pages to add staleness score
processed_wiki_pages = []
pages_by_key = {}
# Group pages by key
for page in wiki_pages:
if page is None:
continue
key = page['key']
if key not in pages_by_key:
pages_by_key[key] = {}
pages_by_key[key][page['language']] = page
# Calculate staleness score for each pair of pages
for key, lang_pages in pages_by_key.items():
# Add English page with staleness score
if 'en' in lang_pages:
en_page = lang_pages['en'].copy()
# If French page exists, calculate staleness score
if 'fr' in lang_pages:
fr_page = lang_pages['fr']
# Skip if dates are missing
if en_page['last_modified'] and fr_page['last_modified']:
# Calculate date difference in days
en_date = datetime.strptime(en_page['last_modified'], '%Y-%m-%d')
fr_date = datetime.strptime(fr_page['last_modified'], '%Y-%m-%d')
date_diff = (en_date - fr_date).days
# Calculate content differences
word_diff = en_page['word_count'] - fr_page['word_count']
section_diff = en_page['sections'] - fr_page['sections']
link_diff = en_page['link_count'] - fr_page['link_count']
# Calculate staleness score
staleness_score = (
abs(date_diff) * 0.2 +
abs(word_diff) / 100 * 0.5 +
abs(section_diff) * 0.15 +
abs(link_diff) / 10 * 0.15
)
# Round to 2 decimal places
staleness_score = round(staleness_score, 2)
en_page['staleness_score'] = staleness_score
fr_page['staleness_score'] = staleness_score
else:
en_page['staleness_score'] = 0
fr_page['staleness_score'] = 0
processed_wiki_pages.append(en_page)
processed_wiki_pages.append(fr_page)
else:
# French page is missing, calculate a high staleness score
missing_staleness_score = (
30 * 0.2 +
en_page['word_count'] / 100 * 0.5 +
en_page['sections'] * 0.15 +
en_page['link_count'] / 10 * 0.15
)
# Round to 2 decimal places and ensure it's high
missing_staleness_score = max(100, round(missing_staleness_score, 2))
en_page['staleness_score'] = missing_staleness_score
processed_wiki_pages.append(en_page)
# Add French page without English counterpart (rare case)
elif 'fr' in lang_pages:
fr_page = lang_pages['fr'].copy()
fr_page['staleness_score'] = 0
processed_wiki_pages.append(fr_page)
# Save processed wiki pages to CSV
try:
with open(WIKI_PAGES_CSV, 'w', newline='', encoding='utf-8') as f:
fieldnames = ['key', 'language', 'url', 'last_modified', 'sections', 'word_count', 'link_count']
# Basic fields for CSV (detailed content will be in JSON only)
fieldnames = ['key', 'language', 'url', 'last_modified', 'sections', 'word_count', 'link_count', 'media_count', 'staleness_score']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for page in wiki_pages:
for page in processed_wiki_pages:
if page: # Skip None values
writer.writerow(page)
# Create a copy with only the CSV fields
csv_page = {field: page.get(field, '') for field in fieldnames if field in page}
writer.writerow(csv_page)
logger.info(f"Wiki page data saved to {WIKI_PAGES_CSV}")
@ -327,10 +603,10 @@ def main():
# Save pages that need updating to JSON
save_to_json(pages_to_update, OUTDATED_PAGES_FILE)
# Print the top 10 pages needing updates
print("\n===== TOP 10 WIKI PAGES NEEDING UPDATES =====")
# Print the top pages needing updates
print(f"\n===== TOP {min(NUM_WIKI_PAGES, len(pages_to_update))} WIKI PAGES NEEDING UPDATES =====")
for i, page in enumerate(pages_to_update[:10], 1):
for i, page in enumerate(pages_to_update[:NUM_WIKI_PAGES], 1):
key = page['key']
reason = page['reason']
en_url = page['en_page']['url'] if page['en_page'] else "N/A"

View file

@ -1,21 +1,40 @@
key,language,url,last_modified,sections,word_count,link_count
building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3873,712
building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3280,629
source,en,https://wiki.openstreetmap.org/wiki/Key:source,2025-08-12,27,2851,399
source,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source,2024-02-07,23,2692,315
highway,en,https://wiki.openstreetmap.org/wiki/Key:highway,2025-04-10,30,4225,865
highway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:highway,2025-01-05,30,4240,780
addr:housenumber,en,https://wiki.openstreetmap.org/wiki/Key:addr:housenumber,2025-07-24,11,429,182
addr:housenumber,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber,2025-03-26,15,1754,236
addr:street,en,https://wiki.openstreetmap.org/wiki/Key:addr:street,2024-10-29,12,701,186
addr:street,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:street,2025-03-26,15,1754,236
addr:city,en,https://wiki.openstreetmap.org/wiki/Key:addr:city,2025-07-29,15,901,190
addr:city,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:city,2025-03-26,15,1754,236
name,en,https://wiki.openstreetmap.org/wiki/Key:name,2025-07-25,17,2295,366
name,fr,https://wiki.openstreetmap.org/wiki/FR:Key:name,2025-01-16,21,1819,272
addr:postcode,en,https://wiki.openstreetmap.org/wiki/Key:addr:postcode,2024-10-29,14,481,168
addr:postcode,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:postcode,2025-03-26,15,1754,236
natural,en,https://wiki.openstreetmap.org/wiki/Key:natural,2025-07-17,17,2169,620
natural,fr,https://wiki.openstreetmap.org/wiki/FR:Key:natural,2025-04-21,13,1598,540
surface,en,https://wiki.openstreetmap.org/wiki/Key:surface,2025-06-29,24,3574,676
surface,fr,https://wiki.openstreetmap.org/wiki/FR:Key:surface,2022-02-22,13,2686,546
key,language,url,last_modified,sections,word_count,link_count,media_count,staleness_score
building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3873,712,158,8.91
building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3280,629,155,8.91
source,en,https://wiki.openstreetmap.org/wiki/Key:source,2025-08-12,27,2851,399,42,113.06
source,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source,2024-02-07,23,2692,315,35,113.06
highway,en,https://wiki.openstreetmap.org/wiki/Key:highway,2025-04-10,30,4225,865,314,20.35
highway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:highway,2025-01-05,30,4240,780,313,20.35
addr:housenumber,en,https://wiki.openstreetmap.org/wiki/Key:addr:housenumber,2025-07-24,11,429,182,20,32.04
addr:housenumber,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber,2025-03-26,15,1754,236,78,32.04
addr:street,en,https://wiki.openstreetmap.org/wiki/Key:addr:street,2024-10-29,12,701,186,16,36.07
addr:street,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:street,2025-03-26,15,1754,236,78,36.07
addr:city,en,https://wiki.openstreetmap.org/wiki/Key:addr:city,2025-07-29,15,901,190,17,29.96
addr:city,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:city,2025-03-26,15,1754,236,78,29.96
name,en,https://wiki.openstreetmap.org/wiki/Key:name,2025-07-25,17,2295,366,82,42.39
name,fr,https://wiki.openstreetmap.org/wiki/FR:Key:name,2025-01-16,21,1819,272,60,42.39
addr:postcode,en,https://wiki.openstreetmap.org/wiki/Key:addr:postcode,2024-10-29,14,481,168,11,37.14
addr:postcode,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:postcode,2025-03-26,15,1754,236,78,37.14
natural,en,https://wiki.openstreetmap.org/wiki/Key:natural,2025-07-17,17,2169,620,189,22.06
natural,fr,https://wiki.openstreetmap.org/wiki/FR:Key:natural,2025-04-21,13,1598,540,174,22.06
surface,en,https://wiki.openstreetmap.org/wiki/Key:surface,2025-06-29,24,3574,676,238,252.64
surface,fr,https://wiki.openstreetmap.org/wiki/FR:Key:surface,2022-02-22,13,2686,546,232,252.64
addr:country,en,https://wiki.openstreetmap.org/wiki/Key:addr:country,2024-12-01,9,283,150,11,22.96
addr:country,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:country,2025-03-25,8,286,150,11,22.96
landuse,en,https://wiki.openstreetmap.org/wiki/Key:landuse,2025-03-01,17,2170,531,168,39.41
landuse,fr,https://wiki.openstreetmap.org/wiki/FR:Key:landuse,2024-08-20,19,2152,503,182,39.41
power,en,https://wiki.openstreetmap.org/wiki/Key:power,2025-02-28,20,740,212,21,124.89
power,fr,https://wiki.openstreetmap.org/wiki/FR:Key:power,2023-06-27,14,489,190,25,124.89
waterway,en,https://wiki.openstreetmap.org/wiki/Key:waterway,2025-03-10,21,1929,450,118,77.94
waterway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:waterway,2024-03-08,18,1390,357,113,77.94
building:levels,en,https://wiki.openstreetmap.org/wiki/Key:building:levels,2025-08-13,16,1450,289,25,76.11
building:levels,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building:levels,2024-08-01,15,1556,287,26,76.11
amenity,en,https://wiki.openstreetmap.org/wiki/Key:amenity,2025-03-16,29,3139,999,504,128.43
amenity,fr,https://wiki.openstreetmap.org/wiki/FR:Key:amenity,2023-07-19,22,2245,885,487,128.43
barrier,en,https://wiki.openstreetmap.org/wiki/Key:barrier,2025-04-15,17,2236,528,173,207.98
barrier,fr,https://wiki.openstreetmap.org/wiki/FR:Key:barrier,2022-08-16,15,641,188,18,207.98
source:date,en,https://wiki.openstreetmap.org/wiki/Key:source:date,2023-04-01,11,494,160,10,22.47
source:date,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source:date,2023-07-21,10,518,160,11,22.47
service,en,https://wiki.openstreetmap.org/wiki/Key:service,2025-03-16,22,1535,303,17,83.79
service,fr,https://wiki.openstreetmap.org/wiki/FR:Key:service,2024-03-04,11,542,185,10,83.79
addr:state,en,https://wiki.openstreetmap.org/wiki/Key:addr:state,2023-06-23,12,388,159,11,100

1 key language url last_modified sections word_count link_count media_count staleness_score
2 building en https://wiki.openstreetmap.org/wiki/Key:building 2025-06-10 31 3873 712 158 8.91
3 building fr https://wiki.openstreetmap.org/wiki/FR:Key:building 2025-05-22 25 3280 629 155 8.91
4 source en https://wiki.openstreetmap.org/wiki/Key:source 2025-08-12 27 2851 399 42 113.06
5 source fr https://wiki.openstreetmap.org/wiki/FR:Key:source 2024-02-07 23 2692 315 35 113.06
6 highway en https://wiki.openstreetmap.org/wiki/Key:highway 2025-04-10 30 4225 865 314 20.35
7 highway fr https://wiki.openstreetmap.org/wiki/FR:Key:highway 2025-01-05 30 4240 780 313 20.35
8 addr:housenumber en https://wiki.openstreetmap.org/wiki/Key:addr:housenumber 2025-07-24 11 429 182 20 32.04
9 addr:housenumber fr https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber 2025-03-26 15 1754 236 78 32.04
10 addr:street en https://wiki.openstreetmap.org/wiki/Key:addr:street 2024-10-29 12 701 186 16 36.07
11 addr:street fr https://wiki.openstreetmap.org/wiki/FR:Key:addr:street 2025-03-26 15 1754 236 78 36.07
12 addr:city en https://wiki.openstreetmap.org/wiki/Key:addr:city 2025-07-29 15 901 190 17 29.96
13 addr:city fr https://wiki.openstreetmap.org/wiki/FR:Key:addr:city 2025-03-26 15 1754 236 78 29.96
14 name en https://wiki.openstreetmap.org/wiki/Key:name 2025-07-25 17 2295 366 82 42.39
15 name fr https://wiki.openstreetmap.org/wiki/FR:Key:name 2025-01-16 21 1819 272 60 42.39
16 addr:postcode en https://wiki.openstreetmap.org/wiki/Key:addr:postcode 2024-10-29 14 481 168 11 37.14
17 addr:postcode fr https://wiki.openstreetmap.org/wiki/FR:Key:addr:postcode 2025-03-26 15 1754 236 78 37.14
18 natural en https://wiki.openstreetmap.org/wiki/Key:natural 2025-07-17 17 2169 620 189 22.06
19 natural fr https://wiki.openstreetmap.org/wiki/FR:Key:natural 2025-04-21 13 1598 540 174 22.06
20 surface en https://wiki.openstreetmap.org/wiki/Key:surface 2025-06-29 24 3574 676 238 252.64
21 surface fr https://wiki.openstreetmap.org/wiki/FR:Key:surface 2022-02-22 13 2686 546 232 252.64
22 addr:country en https://wiki.openstreetmap.org/wiki/Key:addr:country 2024-12-01 9 283 150 11 22.96
23 addr:country fr https://wiki.openstreetmap.org/wiki/FR:Key:addr:country 2025-03-25 8 286 150 11 22.96
24 landuse en https://wiki.openstreetmap.org/wiki/Key:landuse 2025-03-01 17 2170 531 168 39.41
25 landuse fr https://wiki.openstreetmap.org/wiki/FR:Key:landuse 2024-08-20 19 2152 503 182 39.41
26 power en https://wiki.openstreetmap.org/wiki/Key:power 2025-02-28 20 740 212 21 124.89
27 power fr https://wiki.openstreetmap.org/wiki/FR:Key:power 2023-06-27 14 489 190 25 124.89
28 waterway en https://wiki.openstreetmap.org/wiki/Key:waterway 2025-03-10 21 1929 450 118 77.94
29 waterway fr https://wiki.openstreetmap.org/wiki/FR:Key:waterway 2024-03-08 18 1390 357 113 77.94
30 building:levels en https://wiki.openstreetmap.org/wiki/Key:building:levels 2025-08-13 16 1450 289 25 76.11
31 building:levels fr https://wiki.openstreetmap.org/wiki/FR:Key:building:levels 2024-08-01 15 1556 287 26 76.11
32 amenity en https://wiki.openstreetmap.org/wiki/Key:amenity 2025-03-16 29 3139 999 504 128.43
33 amenity fr https://wiki.openstreetmap.org/wiki/FR:Key:amenity 2023-07-19 22 2245 885 487 128.43
34 barrier en https://wiki.openstreetmap.org/wiki/Key:barrier 2025-04-15 17 2236 528 173 207.98
35 barrier fr https://wiki.openstreetmap.org/wiki/FR:Key:barrier 2022-08-16 15 641 188 18 207.98
36 source:date en https://wiki.openstreetmap.org/wiki/Key:source:date 2023-04-01 11 494 160 10 22.47
37 source:date fr https://wiki.openstreetmap.org/wiki/FR:Key:source:date 2023-07-21 10 518 160 11 22.47
38 service en https://wiki.openstreetmap.org/wiki/Key:service 2025-03-16 22 1535 303 17 83.79
39 service fr https://wiki.openstreetmap.org/wiki/FR:Key:service 2024-03-04 11 542 185 10 83.79
40 addr:state en https://wiki.openstreetmap.org/wiki/Key:addr:state 2023-06-23 12 388 159 11 100