up wiki land

This commit is contained in:
Tykayn 2025-08-22 18:19:20 +02:00 committed by tykayn
parent 391a212034
commit e533c273b2
10 changed files with 1116 additions and 182 deletions

View file

@ -0,0 +1,4 @@
{
"last_updated": "2025-08-22T18:13:20.641943",
"recent_changes": []
}

View file

@ -8,6 +8,112 @@ use Symfony\Component\Routing\Annotation\Route;
class WikiController extends AbstractController class WikiController extends AbstractController
{ {
/**
* Detects incorrect heading hierarchies in a list of sections
* For example, h4 directly under h2 without h3 in between
*
* @param array $sections List of sections with 'level' and 'title' keys
* @return array List of section indices with hierarchy errors
*/
private function detectHeadingHierarchyErrors(array $sections): array
{
$errors = [];
$lastLevel = 0;
foreach ($sections as $index => $section) {
$currentLevel = isset($section['level']) ? (int)$section['level'] : 0;
// Skip if level is not set or is 0
if ($currentLevel === 0) {
continue;
}
// If this is the first section, just record its level
if ($lastLevel === 0) {
$lastLevel = $currentLevel;
continue;
}
// Check if the level jump is more than 1
// For example, h2 -> h4 (skipping h3)
if ($currentLevel > $lastLevel + 1) {
$errors[] = $index;
}
$lastLevel = $currentLevel;
}
return $errors;
}
#[Route('/wiki/recent-changes', name: 'app_admin_wiki_recent_changes')]
public function recentChanges(): Response
{
$recentChangesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/recent_changes.json';
// Initialize arrays
$recentChanges = [];
$lastUpdated = null;
// Check if the recent changes file exists and load it
if (file_exists($recentChangesFile)) {
$recentChangesData = json_decode(file_get_contents($recentChangesFile), true);
if (isset($recentChangesData['recent_changes']) && is_array($recentChangesData['recent_changes'])) {
$recentChanges = $recentChangesData['recent_changes'];
$lastUpdated = isset($recentChangesData['last_updated']) ? $recentChangesData['last_updated'] : null;
}
// Check if the data is older than 1 hour
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 hour, refresh the data
if ($diff->h >= 1 || $diff->days > 0) {
$this->refreshRecentChangesData();
return $this->redirectToRoute('app_admin_wiki_recent_changes');
}
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshRecentChangesData();
// Check if the file was created
if (file_exists($recentChangesFile)) {
return $this->redirectToRoute('app_admin_wiki_recent_changes');
} else {
$this->addFlash('error', 'Impossible de générer le fichier des changements récents.');
}
}
return $this->render('admin/wiki_recent_changes.html.twig', [
'recent_changes' => $recentChanges,
'last_updated' => $lastUpdated
]);
}
/**
* Refresh the recent changes data by running the fetch_recent_changes.py script
*/
private function refreshRecentChangesData(): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_recent_changes.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les changements récents. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script fetch_recent_changes.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki/missing-translations', name: 'app_admin_wiki_missing_translations')] #[Route('/wiki/missing-translations', name: 'app_admin_wiki_missing_translations')]
public function missingTranslations(): Response public function missingTranslations(): Response
{ {
@ -664,11 +770,149 @@ class WikiController extends AbstractController
$mediaComparison['fr_only_count'] = count($frOnlyImages); $mediaComparison['fr_only_count'] = count($frOnlyImages);
} }
// Get link comparison data
$linkComparison = $page['link_comparison'] ?? null;
// Sort links alphabetically by URL if link comparison exists
if ($linkComparison) {
// Sort English-only links
if (isset($linkComparison['en_only']) && is_array($linkComparison['en_only'])) {
usort($linkComparison['en_only'], function($a, $b) {
return strcmp($a['href'], $b['href']);
});
}
// Sort French-only links
if (isset($linkComparison['fr_only']) && is_array($linkComparison['fr_only'])) {
usort($linkComparison['fr_only'], function($a, $b) {
return strcmp($a['href'], $b['href']);
});
}
// Sort common links
if (isset($linkComparison['common']) && is_array($linkComparison['common'])) {
usort($linkComparison['common'], function($a, $b) {
return strcmp($a['en']['href'], $b['en']['href']);
});
}
}
// Get section comparison data and filter out "Contents" sections
$sectionComparison = $page['section_comparison'] ?? null;
// Filter out "Contents" sections if section comparison exists
if ($sectionComparison) {
// Filter common sections
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
$sectionComparison['common'] = array_filter($sectionComparison['common'], function($section) {
// Skip if either English or French title is "Contents"
return !($section['en']['title'] === 'Contents' || $section['fr']['title'] === 'Sommaire');
});
// Re-index array
$sectionComparison['common'] = array_values($sectionComparison['common']);
}
// Filter English-only sections
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
$sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function($section) {
return $section['title'] !== 'Contents';
});
// Re-index array
$sectionComparison['en_only'] = array_values($sectionComparison['en_only']);
}
// Filter French-only sections
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
$sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function($section) {
return $section['title'] !== 'Sommaire';
});
// Re-index array
$sectionComparison['fr_only'] = array_values($sectionComparison['fr_only']);
}
}
// Calculate adjusted section counts (excluding "Contents" sections)
$enSectionCount = $enPage['sections'];
$frSectionCount = $frPage['sections'];
// Adjust section counts if we have section comparison data
if ($sectionComparison) {
// Count how many "Contents" sections were filtered out
$contentsFilteredCount = 0;
// Check common sections that were filtered
if (isset($page['section_comparison']['common']) && is_array($page['section_comparison']['common'])) {
foreach ($page['section_comparison']['common'] as $section) {
if ($section['en']['title'] === 'Contents' || $section['fr']['title'] === 'Sommaire') {
$contentsFilteredCount++;
}
}
}
// Check English-only sections that were filtered
if (isset($page['section_comparison']['en_only']) && is_array($page['section_comparison']['en_only'])) {
foreach ($page['section_comparison']['en_only'] as $section) {
if ($section['title'] === 'Contents') {
$contentsFilteredCount++;
}
}
}
// Check French-only sections that were filtered
if (isset($page['section_comparison']['fr_only']) && is_array($page['section_comparison']['fr_only'])) {
foreach ($page['section_comparison']['fr_only'] as $section) {
if ($section['title'] === 'Sommaire') {
$contentsFilteredCount++;
}
}
}
// Adjust section counts
$enSectionCount -= $contentsFilteredCount;
$frSectionCount -= $contentsFilteredCount;
}
// Check for incorrect heading hierarchies
$enHierarchyErrors = [];
$frHierarchyErrors = [];
// Check English sections
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
$enHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['en_only']);
}
// Also check common sections (English side)
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
$commonEnSections = array_map(function($section) {
return $section['en'];
}, $sectionComparison['common']);
$enHierarchyErrors = array_merge($enHierarchyErrors, $this->detectHeadingHierarchyErrors($commonEnSections));
}
// Check French sections
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
$frHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['fr_only']);
}
// Also check common sections (French side)
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
$commonFrSections = array_map(function($section) {
return $section['fr'];
}, $sectionComparison['common']);
$frHierarchyErrors = array_merge($frHierarchyErrors, $this->detectHeadingHierarchyErrors($commonFrSections));
}
$detailedComparison = [ $detailedComparison = [
'section_comparison' => $page['section_comparison'] ?? null, 'section_comparison' => $sectionComparison,
'link_comparison' => $page['link_comparison'] ?? null, 'link_comparison' => $linkComparison,
'media_comparison' => $mediaComparison, 'media_comparison' => $mediaComparison,
'category_comparison' => $page['category_comparison'] ?? null 'category_comparison' => $page['category_comparison'] ?? null,
'adjusted_en_section_count' => $enSectionCount,
'adjusted_fr_section_count' => $frSectionCount,
'en_hierarchy_errors' => $enHierarchyErrors,
'fr_hierarchy_errors' => $frHierarchyErrors
]; ];
$mediaDiff = $page['media_diff'] ?? 0; $mediaDiff = $page['media_diff'] ?? 0;
@ -841,6 +1085,15 @@ class WikiController extends AbstractController
} }
} }
// Ensure page URLs are strings to prevent array to string conversion errors
if ($frPage && isset($frPage['url']) && is_array($frPage['url'])) {
$frPage['url'] = json_encode($frPage['url']);
}
if ($enPage && isset($enPage['url']) && is_array($enPage['url'])) {
$enPage['url'] = json_encode($enPage['url']);
}
return $this->render('admin/wiki_compare.html.twig', [ return $this->render('admin/wiki_compare.html.twig', [
'key' => $key, 'key' => $key,
'en_page' => $enPage, 'en_page' => $enPage,

View file

@ -41,6 +41,11 @@
<i class="bi bi-people"></i> Groupes OSM-FR <i class="bi bi-people"></i> Groupes OSM-FR
</a> </a>
</li> </li>
<li class="nav-item">
<a class="nav-link {% if app.request.get('_route') == 'app_admin_wiki_recent_changes' %}active{% endif %}" href="{{ path('app_admin_wiki_recent_changes') }}">
<i class="bi bi-clock-history"></i> Changements récents
</a>
</li>
</ul> </ul>
</div> </div>
</div> </div>

View file

@ -58,7 +58,13 @@
<div class="card h-100"> <div class="card h-100">
<div class="card-header bg-primary text-white"> <div class="card-header bg-primary text-white">
<h3>Sections en anglais</h3> <h3>Sections en anglais</h3>
<span class="badge bg-light text-dark">{{ en_page.sections }} sections</span> <span class="badge bg-light text-dark">
{% if detailed_comparison.adjusted_en_section_count is defined %}
{{ detailed_comparison.adjusted_en_section_count }} sections ({{ en_page.sections }} avec "Contents")
{% else %}
{{ en_page.sections }} sections
{% endif %}
</span>
</div> </div>
<div class="card-body"> <div class="card-body">
<h4>Sections alignées par hiérarchie</h4> <h4>Sections alignées par hiérarchie</h4>
@ -67,6 +73,9 @@
<li class="list-group-item title-level-{{ section.en.level }}"> <li class="list-group-item title-level-{{ section.en.level }}">
{# <span class="badge bg-secondary">h{{ section.en.level }}</span> #} {# <span class="badge bg-secondary">h{{ section.en.level }}</span> #}
{{ section.en.title }} {{ section.en.title }}
{% if detailed_comparison.en_hierarchy_errors is defined and loop.index0 in detailed_comparison.en_hierarchy_errors %}
<span class="badge bg-danger ms-2" title="Hiérarchie incorrecte">!</span>
{% endif %}
</li> </li>
{% endfor %} {% endfor %}
@ -74,6 +83,9 @@
<li class="list-group-item list-group-item-warning title-level-{{ section.level }}"> <li class="list-group-item list-group-item-warning title-level-{{ section.level }}">
{# <span class="badge bg-secondary">h{{ section.level }}</span> #} {# <span class="badge bg-secondary">h{{ section.level }}</span> #}
{{ section.title }} {{ section.title }}
{% if detailed_comparison.en_hierarchy_errors is defined and (detailed_comparison.section_comparison.common|length + loop.index0) in detailed_comparison.en_hierarchy_errors %}
<span class="badge bg-danger ms-2" title="Hiérarchie incorrecte">!</span>
{% endif %}
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>
@ -84,7 +96,13 @@
<div class="card h-100"> <div class="card h-100">
<div class="card-header bg-info text-white"> <div class="card-header bg-info text-white">
<h3>Sections en français</h3> <h3>Sections en français</h3>
<span class="badge bg-light text-dark">{{ fr_page.sections }} sections</span> <span class="badge bg-light text-dark">
{% if detailed_comparison.adjusted_fr_section_count is defined %}
{{ detailed_comparison.adjusted_fr_section_count }} sections ({{ fr_page.sections }} avec "Sommaire")
{% else %}
{{ fr_page.sections }} sections
{% endif %}
</span>
</div> </div>
<div class="card-body"> <div class="card-body">
<h4>Sections alignées par hiérarchie</h4> <h4>Sections alignées par hiérarchie</h4>
@ -93,6 +111,9 @@
<li class="list-group-item title-level-{{ section.fr.level }}"> <li class="list-group-item title-level-{{ section.fr.level }}">
{# <span class="badge bg-secondary">h{{ section.fr.level }}</span> #} {# <span class="badge bg-secondary">h{{ section.fr.level }}</span> #}
{{ section.fr.title }} {{ section.fr.title }}
{% if detailed_comparison.fr_hierarchy_errors is defined and loop.index0 in detailed_comparison.fr_hierarchy_errors %}
<span class="badge bg-danger ms-2" title="Hiérarchie incorrecte">!</span>
{% endif %}
</li> </li>
{% endfor %} {% endfor %}
@ -100,7 +121,9 @@
<li class="list-group-item list-group-item-info title-level-{{ section.level }}"> <li class="list-group-item list-group-item-info title-level-{{ section.level }}">
{# <span class="badge bg-secondary">h{{ section.level }}</span> #} {# <span class="badge bg-secondary">h{{ section.level }}</span> #}
{{ section.title }} {{ section.title }}
{% if detailed_comparison.fr_hierarchy_errors is defined and (detailed_comparison.section_comparison.common|length + loop.index0) in detailed_comparison.fr_hierarchy_errors %}
<span class="badge bg-danger ms-2" title="Hiérarchie incorrecte">!</span>
{% endif %}
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>
@ -227,59 +250,41 @@
<div class="card-body"> <div class="card-body">
<h4>Liens uniquement en anglais <h4>Comparaison des liens côte à côte</h4>
({{ detailed_comparison.link_comparison.en_only|length }})</h4>
<div class="table-responsive"> <div class="table-responsive">
<table class="table table-sm table-warning"> <table class="table table-sm">
<thead> <thead>
<tr> <tr>
<th>Texte</th> <th class="bg-primary text-white">Texte EN</th>
<th>URL</th> <th class="bg-primary text-white">URL EN</th>
<th class="bg-info text-white">Texte FR</th>
<th class="bg-info text-white">URL FR</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for link in detailed_comparison.link_comparison.en_only %} {% set en_links = detailed_comparison.link_comparison.en_only %}
{% set fr_links = detailed_comparison.link_comparison.fr_only %}
{% set max_links = max(en_links|length, fr_links|length) %}
{% for i in 0..(max_links - 1) %}
<tr> <tr>
<td>{{ link.text }}</td> {% if i < en_links|length %}
<td><a href="{{ link.href }}" target="_blank" <td class="bg-light">{{ en_links[i].text }}</td>
class="small">{{ link.href|slice(0, 30) }}...</a></td> <td class="bg-light"><a href="{{ en_links[i].href }}" target="_blank" class="small">{{ en_links[i].href|slice(0, 30) }}...</a></td>
{% else %}
<td class="bg-light"></td>
<td class="bg-light"></td>
{% endif %}
{% if i < fr_links|length %}
<td>{{ fr_links[i].text }}</td>
<td><a href="{{ fr_links[i].href }}" target="_blank" class="small">{{ fr_links[i].href|slice(0, 30) }}...</a></td>
{% else %}
<td></td>
<td></td>
{% endif %}
</tr> </tr>
{% endfor %} {% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-info text-white">
<h3>Liens en français</h3>
<span class="badge bg-light text-dark">{{ fr_page.link_count }} liens</span>
</div>
<div class="card-body">
<h4>Liens uniquement en français
({{ detailed_comparison.link_comparison.fr_only|length }})</h4>
<div class="table-responsive">
<table class="table table-sm table-info">
<thead>
<tr>
<th>Texte</th>
<th>URL</th>
</tr>
</thead>
<tbody>
{% for link in detailed_comparison.link_comparison.fr_only %}
<tr>
<td>{{ link.text }}</td>
<td><a href="{{ link.href }}" target="_blank"
class="small">{{ link.href|slice(0, 30) }}...</a></td>
</tr>
{% endfor %}
</tbody> </tbody>
</table> </table>
</div> </div>
@ -377,7 +382,13 @@
<ul class="list-group mb-3"> <ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center"> <li class="list-group-item d-flex justify-content-between align-items-center">
Sections Sections
<span class="badge bg-primary rounded-pill">{{ en_page.sections }}</span> <span class="badge bg-primary rounded-pill">
{% if detailed_comparison.adjusted_en_section_count is defined %}
{{ detailed_comparison.adjusted_en_section_count }}
{% else %}
{{ en_page.sections }}
{% endif %}
</span>
</li> </li>
<li class="list-group-item d-flex justify-content-between align-items-center"> <li class="list-group-item d-flex justify-content-between align-items-center">
Mots Mots
@ -510,7 +521,13 @@
<ul class="list-group mb-3"> <ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center"> <li class="list-group-item d-flex justify-content-between align-items-center">
Sections Sections
<span class="badge bg-primary rounded-pill">{{ en_page.sections }}</span> <span class="badge bg-primary rounded-pill">
{% if detailed_comparison.adjusted_en_section_count is defined %}
{{ detailed_comparison.adjusted_en_section_count }}
{% else %}
{{ en_page.sections }}
{% endif %}
</span>
</li> </li>
<li class="list-group-item d-flex justify-content-between align-items-center"> <li class="list-group-item d-flex justify-content-between align-items-center">
Mots Mots
@ -547,7 +564,13 @@
<ul class="list-group mb-3"> <ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center"> <li class="list-group-item d-flex justify-content-between align-items-center">
Sections Sections
<span class="badge bg-info rounded-pill">{{ fr_page.sections }}</span> <span class="badge bg-info rounded-pill">
{% if detailed_comparison.adjusted_fr_section_count is defined %}
{{ detailed_comparison.adjusted_fr_section_count }}
{% else %}
{{ fr_page.sections }}
{% endif %}
</span>
</li> </li>
<li class="list-group-item d-flex justify-content-between align-items-center"> <li class="list-group-item d-flex justify-content-between align-items-center">
Mots Mots

View file

@ -96,26 +96,103 @@
</div> </div>
<div class="card-body"> <div class="card-body">
{% if local_groups|length > 0 %} {% if local_groups|length > 0 %}
<!-- Filtres -->
<div class="mb-4">
<div class="btn-group" role="group" aria-label="Filtres">
<button type="button" class="btn btn-outline-primary active filter-btn" data-filter="all">Tous</button>
<button type="button" class="btn btn-outline-primary filter-btn" data-filter="wiki">Wiki</button>
<button type="button" class="btn btn-outline-primary filter-btn" data-filter="framacalc">Framacalc</button>
<button type="button" class="btn btn-outline-primary filter-btn" data-filter="has-wiki">Avec page wiki</button>
<button type="button" class="btn btn-outline-primary filter-btn" data-filter="no-wiki">Sans page wiki</button>
</div>
</div>
<div class="row row-cols-1 row-cols-md-2 row-cols-lg-3 g-4"> <div class="row row-cols-1 row-cols-md-2 row-cols-lg-3 g-4">
{% for group in local_groups %} {% for group in local_groups %}
<div class="col"> {% set source = group.source|default('wiki') %}
<div class="card h-100"> {% set has_wiki = group.has_wiki_page|default(true) %}
{% set filter_classes = source ~ ' ' ~ (has_wiki ? 'has-wiki' : 'no-wiki') %}
<div class="col group-item {{ filter_classes }}">
<div class="card h-100 {% if source == 'framacalc' and not has_wiki %}border-danger{% endif %}">
{% if source == 'framacalc' %}
<div class="card-header bg-light">
<span class="badge bg-secondary">Framacalc</span>
{% if has_wiki %}
<span class="badge bg-success">Page wiki</span>
{% else %}
<span class="badge bg-danger">Pas de page wiki</span>
{% endif %}
</div>
{% endif %}
<div class="card-body"> <div class="card-body">
<h5 class="card-title">{{ group.name }}</h5> <h5 class="card-title">{{ group.name }}</h5>
{% if group.description %} {% if group.description %}
<p class="card-text">{{ group.description }}</p> <p class="card-text">{{ group.description }}</p>
{% endif %} {% endif %}
{% if source == 'framacalc' and group.contact %}
<p class="card-text"><small class="text-muted">Contact: {{ group.contact }}</small></p>
{% endif %}
{% if source == 'framacalc' and group.website %}
<p class="card-text">
<a href="{{ group.website }}" target="_blank" class="btn btn-sm btn-outline-secondary">
<i class="bi bi-globe"></i> Site web
</a>
</p>
{% endif %}
</div> </div>
<div class="card-footer"> <div class="card-footer">
<a href="{{ group.url }}" target="_blank" {% if source == 'wiki' or has_wiki %}
class="btn btn-sm btn-outline-primary"> <a href="{{ group.url }}" target="_blank"
<i class="bi bi-box-arrow-up-right"></i> Voir sur le wiki class="btn btn-sm btn-outline-primary">
</a> <i class="bi bi-box-arrow-up-right"></i> Voir sur le wiki
</a>
{% else %}
<a href="https://wiki.openstreetmap.org/wiki/Special:Search?search={{ group.name|url_encode }}"
target="_blank" class="btn btn-sm btn-outline-danger">
<i class="bi bi-search"></i> Rechercher sur le wiki
</a>
{% endif %}
</div> </div>
</div> </div>
</div> </div>
{% endfor %} {% endfor %}
</div> </div>
<!-- JavaScript pour les filtres -->
<script>
document.addEventListener('DOMContentLoaded', function() {
const filterButtons = document.querySelectorAll('.filter-btn');
const groupItems = document.querySelectorAll('.group-item');
filterButtons.forEach(button => {
button.addEventListener('click', function() {
// Remove active class from all buttons
filterButtons.forEach(btn => btn.classList.remove('active'));
// Add active class to clicked button
this.classList.add('active');
const filter = this.getAttribute('data-filter');
// Show/hide items based on filter
groupItems.forEach(item => {
if (filter === 'all') {
item.style.display = 'block';
} else {
if (item.classList.contains(filter)) {
item.style.display = 'block';
} else {
item.style.display = 'none';
}
}
});
});
});
});
</script>
{% else %} {% else %}
<div class="alert alert-info"> <div class="alert alert-info">
<p><i class="bi bi-info-circle"></i> Aucun groupe local n'a été trouvé.</p> <p><i class="bi bi-info-circle"></i> Aucun groupe local n'a été trouvé.</p>

View file

@ -3,120 +3,122 @@
{% block title %}Suggestion de page Wiki à améliorer{% endblock %} {% block title %}Suggestion de page Wiki à améliorer{% endblock %}
{% block body %} {% block body %}
<div class="container mt-4"> <div class="container mt-4">
{% include 'admin/_wiki_navigation.html.twig' %} {% include 'admin/_wiki_navigation.html.twig' %}
<h1>Suggestion de page Wiki à améliorer</h1>
<p class="lead">Voici une page wiki qui a besoin d'être améliorée.</p>
<div class="card mb-4"> <h1>Suggestion de page Wiki à améliorer</h1>
<div class="card-header bg-primary text-white"> <p class="lead">Voici une page wiki qui a besoin d'être améliorée.</p>
<h2>{{ page.key }}</h2>
</div> <div class="card mb-4">
<div class="card-body"> <div class="card-header bg-primary text-white">
<div class="alert alert-info"> <h2>{{ page.key }}</h2>
<h3>Raisons d'amélioration</h3>
<p>{{ page.reason }}</p>
</div> </div>
<div class="card-body">
<div class="alert alert-info">
<h3>Raisons d'amélioration</h3>
<p>{{ page.reason }}</p>
</div>
<div class="row"> <div class="row">
<div class="col-md-6"> <div class="col-md-6">
<div class="card h-100"> <div class="card h-100">
<div class="card-header bg-primary text-white"> <div class="card-header bg-primary text-white">
<h3>Version anglaise</h3> <h3>Version anglaise</h3>
<p class="mb-0"> <p class="mb-0">
<small>Dernière modification: {{ page.en_page.last_modified }}</small> <small>Dernière modification: {{ page.en_page.last_modified }}</small>
</p> </p>
</div>
<div class="card-body">
<ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center">
Sections
<span class="badge bg-primary rounded-pill">{{ page.en_page.sections }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Mots
<span class="badge bg-primary rounded-pill">{{ page.en_page.word_count|default(0) }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Liens
<span class="badge bg-primary rounded-pill">{{ page.en_page.link_count|default(0) }}</span>
</li>
</ul>
<div class="d-grid gap-2">
<a href="{{ page.en_page.url }}" target="_blank" class="btn btn-outline-primary">
<i class="bi bi-box-arrow-up-right"></i> Voir la page anglaise
</a>
</div>
</div>
</div> </div>
<div class="card-body"> </div>
<ul class="list-group mb-3"> <div class="col-md-6">
<li class="list-group-item d-flex justify-content-between align-items-center"> <div class="card h-100">
Sections <div class="card-header bg-info text-white">
<span class="badge bg-primary rounded-pill">{{ page.en_page.sections }}</span> <h3>Version française</h3>
</li> {% if page.fr_page %}
<li class="list-group-item d-flex justify-content-between align-items-center"> <p class="mb-0">
Mots <small>Dernière modification: {{ page.fr_page.last_modified }}</small>
<span class="badge bg-primary rounded-pill">{{ page.en_page.word_count|default(0) }}</span> </p>
</li> {% else %}
<li class="list-group-item d-flex justify-content-between align-items-center"> <p class="mb-0">
Liens <small>Page non existante</small>
<span class="badge bg-primary rounded-pill">{{ page.en_page.link_count|default(0) }}</span> </p>
</li> {% endif %}
</ul> </div>
<div class="d-grid gap-2"> <div class="card-body">
<a href="{{ page.en_page.url }}" target="_blank" class="btn btn-outline-primary"> {% if page.fr_page %}
<i class="bi bi-box-arrow-up-right"></i> Voir la page anglaise <ul class="list-group mb-3">
</a> <li class="list-group-item d-flex justify-content-between align-items-center">
Sections
<span class="badge bg-info rounded-pill">{{ page.fr_page.sections }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Mots
<span class="badge bg-info rounded-pill">{{ page.fr_page.word_count|default(0) }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Liens
<span class="badge bg-info rounded-pill">{{ page.fr_page.link_count|default(0) }}</span>
</li>
</ul>
<div class="d-grid gap-2">
<a href="{{ page.fr_page.url }}" target="_blank" class="btn btn-outline-info">
<i class="bi bi-box-arrow-up-right"></i> Voir la page française
</a>
</div>
{% else %}
<div class="alert alert-warning">
<p><i class="bi bi-exclamation-triangle"></i> <strong>La page wiki pour la clé
"{{ page.key }}" n'existe pas en français.</strong></p>
<p>Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.</p>
</div>
<div class="d-grid gap-2">
<a href="https://wiki.openstreetmap.org/w/index.php?title=FR:Key:{{ page.key }}&action=edit"
target="_blank" class="btn btn-success">
<i class="bi bi-plus-circle"></i> Créer la page française
</a>
</div>
{% endif %}
</div> </div>
</div> </div>
</div> </div>
</div> </div>
<div class="col-md-6">
<div class="card h-100"> <div class="mt-4 d-grid gap-2">
<div class="card-header bg-info text-white"> <a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}" class="btn btn-primary">
<h3>Version française</h3> <i class="bi bi-arrows-angle-expand"></i> Voir la comparaison détaillée
{% if page.fr_page %} </a>
<p class="mb-0"> <a href="{{ path('app_admin_wiki_random_suggestion') }}" class="btn btn-secondary">
<small>Dernière modification: {{ page.fr_page.last_modified }}</small> <i class="bi bi-shuffle"></i> Autre suggestion aléatoire
</p> </a>
{% else %}
<p class="mb-0">
<small>Page non existante</small>
</p>
{% endif %}
</div>
<div class="card-body">
{% if page.fr_page %}
<ul class="list-group mb-3">
<li class="list-group-item d-flex justify-content-between align-items-center">
Sections
<span class="badge bg-info rounded-pill">{{ page.fr_page.sections }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Mots
<span class="badge bg-info rounded-pill">{{ page.fr_page.word_count|default(0) }}</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-center">
Liens
<span class="badge bg-info rounded-pill">{{ page.fr_page.link_count|default(0) }}</span>
</li>
</ul>
<div class="d-grid gap-2">
<a href="{{ page.fr_page.url }}" target="_blank" class="btn btn-outline-info">
<i class="bi bi-box-arrow-up-right"></i> Voir la page française
</a>
</div>
{% else %}
<div class="alert alert-warning">
<p><i class="bi bi-exclamation-triangle"></i> <strong>La page wiki pour la clé "{{ page.key }}" n'existe pas en français.</strong></p>
<p>Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.</p>
</div>
<div class="d-grid gap-2">
<a href="https://wiki.openstreetmap.org/wiki/FR:Key:{{ page.key }}" target="_blank" class="btn btn-success">
<i class="bi bi-plus-circle"></i> Créer la page française
</a>
</div>
{% endif %}
</div>
</div>
</div> </div>
</div> </div>
</div>
<div class="mt-4 d-grid gap-2"> <div class="mt-3">
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}" class="btn btn-primary"> <a href="{{ path('app_admin_wiki') }}" class="btn btn-secondary">
<i class="bi bi-arrows-angle-expand"></i> Voir la comparaison détaillée <i class="bi bi-arrow-left"></i> Retour à la liste des pages wiki
</a> </a>
<a href="{{ path('app_admin_wiki_random_suggestion') }}" class="btn btn-secondary">
<i class="bi bi-shuffle"></i> Autre suggestion aléatoire
</a>
</div>
</div> </div>
</div> </div>
<div class="mt-3">
<a href="{{ path('app_admin_wiki') }}" class="btn btn-secondary">
<i class="bi bi-arrow-left"></i> Retour à la liste des pages wiki
</a>
</div>
</div>
{% endblock %} {% endblock %}

View file

@ -0,0 +1,87 @@
{% extends 'base.html.twig' %}
{% block title %}Changements récents Wiki OSM{% endblock %}
{% block body %}
<div class="container mt-4">
{% include 'admin/_wiki_navigation.html.twig' %}
<h1>Changements récents Wiki OpenStreetMap</h1>
<p class="lead">Liste des changements récents dans l'espace de noms français du wiki OpenStreetMap.</p>
{% if last_updated %}
<div class="alert alert-info">
<i class="bi bi-info-circle"></i> Dernière mise à jour : {{ last_updated|date('d/m/Y H:i') }}
</div>
{% endif %}
<div class="card mb-4">
<div class="card-header bg-primary text-white">
<h2>Changements récents</h2>
<p class="mb-0">
<a href="https://wiki.openstreetmap.org/w/index.php?hidebots=1&hidepreviousrevisions=1&hidecategorization=1&hideWikibase=1&hidelog=1&hidenewuserlog=1&namespace=202&limit=500&days=30&enhanced=1&title=Special:RecentChanges&urlversion=2"
target="_blank" class="text-white">
<i class="bi bi-box-arrow-up-right"></i> Voir sur le wiki OSM
</a>
</p>
</div>
<div class="card-body">
{% if recent_changes|length > 0 %}
<div class="table-responsive">
<table class="table table-striped table-hover">
<thead class="thead-dark">
<tr>
<th>Page</th>
<th>Date</th>
<th>Utilisateur</th>
<th>Commentaire</th>
<th>Taille</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for change in recent_changes %}
<tr>
<td>
<strong>{{ change.page_name }}</strong>
</td>
<td>{{ change.timestamp }}</td>
<td>{{ change.user }}</td>
<td>{{ change.comment }}</td>
<td>{{ change.change_size }}</td>
<td>
<a href="{{ change.page_url }}" target="_blank" class="btn btn-sm btn-outline-primary">
<i class="bi bi-box-arrow-up-right"></i> Voir
</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="alert alert-info">
<p><i class="bi bi-info-circle"></i> Aucun changement récent n'a été trouvé.</p>
</div>
{% endif %}
</div>
</div>
<div class="card mb-4">
<div class="card-header">
<h2>À propos des changements récents</h2>
</div>
<div class="card-body">
<p>Cette page affiche les changements récents dans l'espace de noms français (FR:) du wiki OpenStreetMap.</p>
<p>Ces informations sont utiles pour suivre les traductions manquantes et les mises à jour des pages wiki.</p>
<p>Les données sont mises à jour automatiquement toutes les heures.</p>
</div>
</div>
<div class="mt-3">
<a href="{{ path('app_admin_wiki') }}" class="btn btn-secondary">
<i class="bi bi-arrow-left"></i> Retour à la liste des pages wiki
</a>
</div>
</div>
{% endblock %}

66
wiki_compare/CHANGES.md Normal file
View file

@ -0,0 +1,66 @@
# Changements implémentés
Ce document résume les changements et nouvelles fonctionnalités implémentés dans le cadre de la mise à jour du système de gestion des pages wiki OSM.
## 1. Suivi des changements récents du wiki OSM
### Fonctionnalités ajoutées
- Création d'un script `fetch_recent_changes.py` qui récupère les changements récents dans l'espace de noms français du wiki OSM
- Ajout d'une nouvelle route `/wiki/recent-changes` dans le contrôleur WikiController
- Création d'un template `wiki_recent_changes.html.twig` pour afficher les changements récents
- Mise à jour de la navigation pour inclure un lien vers la page des changements récents
### Utilisation
- Les changements récents sont automatiquement récupérés toutes les heures
- La page affiche la liste des pages modifiées récemment avec des liens vers ces pages
## 2. Validation de la hiérarchie des titres
### Fonctionnalités ajoutées
- Implémentation d'une logique de détection des hiérarchies de titres incorrectes (par exemple, h4 directement sous h2 sans h3 intermédiaire)
- Ajout d'indicateurs visuels (badges) pour signaler les hiérarchies incorrectes dans les listes de sections
- Mise à jour du template `wiki_compare.html.twig` pour afficher ces indicateurs
### Utilisation
- Les hiérarchies incorrectes sont automatiquement détectées lors de la comparaison des pages wiki
- Un badge rouge avec un point d'exclamation est affiché à côté des titres ayant une hiérarchie incorrecte
## 3. Vérification des groupes locaux
### Fonctionnalités ajoutées
- Mise à jour du script `fetch_osm_fr_groups.py` pour récupérer les données des groupes locaux depuis Framacalc
- Ajout d'une fonctionnalité de vérification de l'existence d'une page wiki pour chaque groupe
- Mise à jour du template `wiki_osm_fr_groups.html.twig` pour afficher les résultats de vérification
- Ajout de filtres pour faciliter la navigation parmi les groupes
### Utilisation
- Les groupes sont affichés avec des badges indiquant leur source (wiki ou Framacalc)
- Les groupes sans page wiki sont mis en évidence avec un badge rouge
- Les filtres permettent de voir uniquement les groupes d'une certaine catégorie (tous, wiki, Framacalc, avec page wiki, sans page wiki)
## Limitations connues
1. **Accès aux données externes** : Les scripts peuvent rencontrer des difficultés pour accéder aux données externes (wiki OSM, Framacalc) en fonction de l'environnement d'exécution.
2. **Détection des hiérarchies** : La détection des hiérarchies incorrectes se base uniquement sur les niveaux des titres et ne prend pas en compte le contenu ou la sémantique.
3. **Correspondance des groupes** : La correspondance entre les groupes Framacalc et les pages wiki se fait par une comparaison approximative des noms, ce qui peut parfois donner des résultats imprécis.
## Maintenance future
### Scripts Python
- Les scripts Python sont situés dans le répertoire `wiki_compare/`
- Ils peuvent être exécutés manuellement ou via des tâches cron
- L'option `--dry-run` permet de tester les scripts sans modifier les fichiers
- L'option `--force` permet de forcer la mise à jour même si le cache est récent
### Templates Twig
- Les templates sont situés dans le répertoire `templates/admin/`
- `wiki_recent_changes.html.twig` : Affichage des changements récents
- `wiki_compare.html.twig` : Comparaison des pages wiki avec validation de hiérarchie
- `wiki_osm_fr_groups.html.twig` : Affichage des groupes locaux avec vérification des pages wiki
### Contrôleur
- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement
- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour

View file

@ -4,9 +4,11 @@
""" """
fetch_osm_fr_groups.py fetch_osm_fr_groups.py
This script scrapes the OpenStreetMap wiki page for France/OSM-FR to extract This script fetches information about OSM-FR local groups from two sources:
information about local working groups. It specifically targets links in the 1. The OpenStreetMap wiki page for France/OSM-FR (specifically the #Pages_des_groupes_locaux section)
#Pages_des_groupes_locaux section. 2. The Framacalc spreadsheet at https://framacalc.org/osm-groupes-locaux
It then verifies that each group from the Framacalc has a corresponding wiki page.
Usage: Usage:
python fetch_osm_fr_groups.py [--dry-run] [--force] python fetch_osm_fr_groups.py [--dry-run] [--force]
@ -24,6 +26,8 @@ import json
import argparse import argparse
import logging import logging
import os import os
import csv
import io
from datetime import datetime, timedelta from datetime import datetime, timedelta
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -40,6 +44,8 @@ logger = logging.getLogger(__name__)
OUTPUT_FILE = "osm_fr_groups.json" OUTPUT_FILE = "osm_fr_groups.json"
BASE_URL = "https://wiki.openstreetmap.org/wiki/France/OSM-FR" BASE_URL = "https://wiki.openstreetmap.org/wiki/France/OSM-FR"
WIKI_BASE_URL = "https://wiki.openstreetmap.org" WIKI_BASE_URL = "https://wiki.openstreetmap.org"
FRAMACALC_URL = "https://framacalc.org/osm-groupes-locaux/export/csv"
WIKI_GROUPS_URL = "https://wiki.openstreetmap.org/wiki/France/OSM-FR#Groupes_locaux"
CACHE_DURATION = timedelta(hours=1) # Cache duration of 1 hour CACHE_DURATION = timedelta(hours=1) # Cache duration of 1 hour
def is_cache_fresh(): def is_cache_fresh():
@ -141,7 +147,7 @@ def extract_working_groups(html_content):
logger.info(f"Found {len(working_groups)} working groups") logger.info(f"Found {len(working_groups)} working groups")
return working_groups return working_groups
def extract_local_groups(html_content): def extract_local_groups_from_wiki(html_content):
""" """
Extract local groups from the wiki page HTML Extract local groups from the wiki page HTML
@ -193,13 +199,170 @@ def extract_local_groups(html_content):
"name": name, "name": name,
"url": url, "url": url,
"description": description, "description": description,
"type": "local_group" "type": "local_group",
"source": "wiki"
}) })
current = current.next_sibling current = current.next_sibling
logger.info(f"Found {len(local_groups)} local groups") logger.info(f"Found {len(local_groups)} local groups from wiki")
return local_groups return local_groups
def fetch_framacalc_data():
"""
Fetch local groups data from Framacalc
Returns:
list: List of local group dictionaries from Framacalc
"""
try:
response = requests.get(FRAMACALC_URL)
response.raise_for_status()
# Parse CSV data
csv_data = csv.reader(io.StringIO(response.text))
rows = list(csv_data)
# Check if we have data
if len(rows) < 2:
logger.warning("No data found in Framacalc CSV")
return []
# Extract headers (first row)
headers = rows[0]
# Find the indices of important columns
name_idx = -1
contact_idx = -1
website_idx = -1
for i, header in enumerate(headers):
header_lower = header.lower()
if 'nom' in header_lower or 'groupe' in header_lower:
name_idx = i
elif 'contact' in header_lower or 'email' in header_lower:
contact_idx = i
elif 'site' in header_lower or 'web' in header_lower:
website_idx = i
if name_idx == -1:
logger.warning("Could not find name column in Framacalc CSV")
return []
# Process data rows
local_groups = []
for row in rows[1:]: # Skip header row
if len(row) <= name_idx or not row[name_idx].strip():
continue # Skip empty rows
name = row[name_idx].strip()
contact = row[contact_idx].strip() if contact_idx != -1 and contact_idx < len(row) else ""
website = row[website_idx].strip() if website_idx != -1 and website_idx < len(row) else ""
local_groups.append({
"name": name,
"contact": contact,
"website": website,
"type": "local_group",
"source": "framacalc",
"has_wiki_page": False, # Will be updated later
"wiki_url": "" # Will be updated later
})
logger.info(f"Found {len(local_groups)} local groups from Framacalc")
return local_groups
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching Framacalc data: {e}")
return []
except Exception as e:
logger.error(f"Error processing Framacalc data: {e}")
return []
def extract_wiki_group_links():
"""
Extract links to local group wiki pages from the OSM-FR wiki page
Returns:
dict: Dictionary mapping group names to wiki URLs
"""
try:
# Get the wiki page content
response = requests.get(WIKI_GROUPS_URL)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
wiki_links = {}
# Find the "Pages des groupes locaux" section
pages_section = None
for heading in soup.find_all(['h2', 'h3', 'h4']):
if 'Pages des groupes locaux' in heading.get_text():
pages_section = heading
break
if not pages_section:
logger.warning("Could not find 'Pages des groupes locaux' section")
return {}
# Get the content following the heading until the next heading
current = pages_section.next_sibling
while current and not current.name in ['h2', 'h3', 'h4']:
if current.name == 'ul':
# Process list items
for li in current.find_all('li', recursive=False):
text = li.get_text().strip()
link = li.find('a')
if link and text:
# Extract group name (before the comma)
parts = text.split(',', 1)
group_name = parts[0].strip()
url = WIKI_BASE_URL + link.get('href') if link.get('href').startswith('/') else link.get('href')
wiki_links[group_name] = url
current = current.next_sibling
logger.info(f"Found {len(wiki_links)} wiki links for local groups")
return wiki_links
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching wiki group links: {e}")
return {}
except Exception as e:
logger.error(f"Error processing wiki group links: {e}")
return {}
def verify_framacalc_groups_have_wiki(framacalc_groups, wiki_links):
"""
Verify that each group from Framacalc has a corresponding wiki page
Args:
framacalc_groups (list): List of local group dictionaries from Framacalc
wiki_links (dict): Dictionary mapping group names to wiki URLs
Returns:
list: Updated list of local group dictionaries with wiki verification
"""
for group in framacalc_groups:
group_name = group['name']
# Try to find a matching wiki link
found = False
for wiki_name, wiki_url in wiki_links.items():
# Check if the group name is similar to the wiki name
if group_name.lower() in wiki_name.lower() or wiki_name.lower() in group_name.lower():
group['has_wiki_page'] = True
group['wiki_url'] = wiki_url
found = True
break
if not found:
group['has_wiki_page'] = False
group['wiki_url'] = ""
return framacalc_groups
def extract_umap_url(html_content): def extract_umap_url(html_content):
""" """
Extract the uMap URL for OSM-FR local groups Extract the uMap URL for OSM-FR local groups
@ -223,14 +386,16 @@ def extract_umap_url(html_content):
return None return None
def save_results(local_groups, working_groups, umap_url, dry_run=False): def save_results(wiki_local_groups, framacalc_groups, working_groups, umap_url, wiki_links, dry_run=False):
""" """
Save the results to a JSON file Save the results to a JSON file
Args: Args:
local_groups (list): List of local group dictionaries wiki_local_groups (list): List of local group dictionaries from wiki
framacalc_groups (list): List of local group dictionaries from Framacalc
working_groups (list): List of working group dictionaries working_groups (list): List of working group dictionaries
umap_url (str): URL to the uMap for local groups umap_url (str): URL to the uMap for local groups
wiki_links (dict): Dictionary mapping group names to wiki URLs
dry_run (bool): If True, don't actually save to file dry_run (bool): If True, don't actually save to file
Returns: Returns:
@ -238,28 +403,41 @@ def save_results(local_groups, working_groups, umap_url, dry_run=False):
""" """
if dry_run: if dry_run:
logger.info("DRY RUN: Would have saved results to file") logger.info("DRY RUN: Would have saved results to file")
logger.info(f"Local groups: {len(local_groups)}") logger.info(f"Wiki local groups: {len(wiki_local_groups)}")
for group in local_groups: for group in wiki_local_groups[:5]: # Show only first 5 for brevity
logger.info(f" - {group['name']}: {group['url']}") logger.info(f" - {group['name']}: {group['url']}")
logger.info(f"Framacalc groups: {len(framacalc_groups)}")
for group in framacalc_groups[:5]: # Show only first 5 for brevity
wiki_status = "Has wiki page" if group.get('has_wiki_page') else "No wiki page"
logger.info(f" - {group['name']}: {wiki_status}")
logger.info(f"Working groups: {len(working_groups)}") logger.info(f"Working groups: {len(working_groups)}")
for group in working_groups: for group in working_groups[:5]: # Show only first 5 for brevity
logger.info(f" - {group['name']}: {group['url']}") logger.info(f" - {group['name']}: {group['url']}")
if umap_url: if umap_url:
logger.info(f"uMap URL: {umap_url}") logger.info(f"uMap URL: {umap_url}")
logger.info(f"Wiki links: {len(wiki_links)}")
return True return True
# Combine all local groups
all_local_groups = wiki_local_groups + framacalc_groups
# Prepare the data structure # Prepare the data structure
data = { data = {
"last_updated": datetime.now().isoformat(), "last_updated": datetime.now().isoformat(),
"local_groups": local_groups, "local_groups": all_local_groups,
"working_groups": working_groups, "working_groups": working_groups,
"umap_url": umap_url "umap_url": umap_url,
"wiki_links": wiki_links
} }
try: try:
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False) json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(f"Successfully saved {len(local_groups)} local groups and {len(working_groups)} working groups to {OUTPUT_FILE}") logger.info(f"Successfully saved {len(all_local_groups)} local groups and {len(working_groups)} working groups to {OUTPUT_FILE}")
return True return True
except IOError as e: except IOError as e:
logger.error(f"Error saving results to {OUTPUT_FILE}: {e}") logger.error(f"Error saving results to {OUTPUT_FILE}: {e}")
@ -267,7 +445,7 @@ def save_results(local_groups, working_groups, umap_url, dry_run=False):
def main(): def main():
"""Main function to execute the script""" """Main function to execute the script"""
parser = argparse.ArgumentParser(description="Scrape OSM-FR local groups from the wiki") parser = argparse.ArgumentParser(description="Fetch OSM-FR local groups from wiki and Framacalc")
parser.add_argument("--dry-run", action="store_true", help="Run without saving results to file") parser.add_argument("--dry-run", action="store_true", help="Run without saving results to file")
parser.add_argument("--force", action="store_true", help="Force update even if cache is fresh") parser.add_argument("--force", action="store_true", help="Force update even if cache is fresh")
args = parser.parse_args() args = parser.parse_args()
@ -287,11 +465,11 @@ def main():
logger.error("Failed to get wiki page content") logger.error("Failed to get wiki page content")
return return
# Extract local groups # Extract local groups from wiki
local_groups = extract_local_groups(html_content) wiki_local_groups = extract_local_groups_from_wiki(html_content)
if not local_groups: if not wiki_local_groups:
logger.warning("No local groups found") logger.warning("No local groups found in wiki")
# Extract working groups # Extract working groups
working_groups = extract_working_groups(html_content) working_groups = extract_working_groups(html_content)
@ -304,8 +482,31 @@ def main():
# Extract uMap URL # Extract uMap URL
umap_url = extract_umap_url(html_content) umap_url = extract_umap_url(html_content)
# Fetch local groups from Framacalc
framacalc_groups = fetch_framacalc_data()
if not framacalc_groups:
logger.warning("No local groups found in Framacalc")
# Extract wiki group links
wiki_links = extract_wiki_group_links()
if not wiki_links:
logger.warning("No wiki links found for local groups")
# Verify Framacalc groups have wiki pages
if framacalc_groups and wiki_links:
framacalc_groups = verify_framacalc_groups_have_wiki(framacalc_groups, wiki_links)
# Count groups with and without wiki pages
groups_with_wiki = sum(1 for group in framacalc_groups if group.get('has_wiki_page'))
groups_without_wiki = sum(1 for group in framacalc_groups if not group.get('has_wiki_page'))
logger.info(f"Framacalc groups with wiki pages: {groups_with_wiki}")
logger.info(f"Framacalc groups without wiki pages: {groups_without_wiki}")
# Save results # Save results
success = save_results(local_groups, working_groups, umap_url, args.dry_run) success = save_results(wiki_local_groups, framacalc_groups, working_groups, umap_url, wiki_links, args.dry_run)
if success: if success:
logger.info("Script completed successfully") logger.info("Script completed successfully")

View file

@ -0,0 +1,216 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
fetch_recent_changes.py
This script fetches recent changes from the OpenStreetMap wiki for the French namespace
and stores the URLs of these pages. It specifically targets the recent changes page:
https://wiki.openstreetmap.org/w/index.php?hidebots=1&hidepreviousrevisions=1&hidecategorization=1&hideWikibase=1&hidelog=1&hidenewuserlog=1&namespace=202&limit=500&days=30&enhanced=1&title=Special:RecentChanges&urlversion=2
Usage:
python fetch_recent_changes.py [--dry-run] [--force]
Options:
--dry-run Run the script without saving the results to a file
--force Force update even if the cache is still fresh (less than 1 hour old)
Output:
- recent_changes.json: JSON file with information about recent changes in the French namespace
- Log messages about the scraping process and results
"""
import json
import argparse
import logging
import os
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# Constants
OUTPUT_FILE = "recent_changes.json"
RECENT_CHANGES_URL = "https://wiki.openstreetmap.org/w/index.php?hidebots=1&hidepreviousrevisions=1&hidecategorization=1&hideWikibase=1&hidelog=1&hidenewuserlog=1&namespace=202&limit=500&days=30&enhanced=1&title=Special:RecentChanges&urlversion=2"
WIKI_BASE_URL = "https://wiki.openstreetmap.org"
CACHE_DURATION = timedelta(hours=1) # Cache duration of 1 hour
def is_cache_fresh():
"""
Check if the cache file exists and is less than CACHE_DURATION old
Returns:
bool: True if cache is fresh, False otherwise
"""
if not os.path.exists(OUTPUT_FILE):
return False
try:
with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
last_updated = datetime.fromisoformat(data.get('last_updated', '2000-01-01T00:00:00'))
now = datetime.now()
return (now - last_updated) < CACHE_DURATION
except (IOError, json.JSONDecodeError, ValueError) as e:
logger.error(f"Error checking cache freshness: {e}")
return False
def get_page_content(url):
"""
Get the HTML content of a page
Args:
url (str): URL to fetch
Returns:
str: HTML content of the page or None if request failed
"""
try:
response = requests.get(url)
response.raise_for_status()
return response.text
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching {url}: {e}")
return None
def extract_recent_changes(html_content):
"""
Extract recent changes from the wiki page HTML
Args:
html_content (str): HTML content of the recent changes page
Returns:
list: List of recent change dictionaries
"""
if not html_content:
return []
soup = BeautifulSoup(html_content, 'html.parser')
recent_changes = []
# Find the changes list
changes_list = soup.find('ul', class_='special')
if not changes_list:
logger.warning("Could not find recent changes list")
return []
# Process each list item (each change)
for li in changes_list.find_all('li'):
# Extract the page link
page_link = li.find('a', class_='mw-changeslist-title')
if not page_link:
continue
page_name = page_link.get_text().strip()
page_url = WIKI_BASE_URL + page_link.get('href')
# Extract the timestamp
timestamp_span = li.find('span', class_='mw-changeslist-date')
timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown"
# Extract the user
user_link = li.find('a', class_='mw-userlink')
user = user_link.get_text().strip() if user_link else "Unknown"
# Extract the comment
comment_span = li.find('span', class_='comment')
comment = comment_span.get_text().strip() if comment_span else ""
# Extract the change size
change_size_span = li.find('span', class_='mw-changeslist-separator').next_sibling
change_size = change_size_span.get_text().strip() if change_size_span else "0"
recent_changes.append({
"page_name": page_name,
"page_url": page_url,
"timestamp": timestamp,
"user": user,
"comment": comment,
"change_size": change_size
})
logger.info(f"Found {len(recent_changes)} recent changes")
return recent_changes
def save_results(recent_changes, dry_run=False):
"""
Save the results to a JSON file
Args:
recent_changes (list): List of recent change dictionaries
dry_run (bool): If True, don't actually save to file
Returns:
bool: True if saving was successful or dry run, False otherwise
"""
if dry_run:
logger.info("DRY RUN: Would have saved results to file")
logger.info(f"Recent changes: {len(recent_changes)}")
for change in recent_changes[:5]: # Show only first 5 for brevity
logger.info(f" - {change['page_name']}: {change['page_url']} ({change['timestamp']})")
if len(recent_changes) > 5:
logger.info(f" ... and {len(recent_changes) - 5} more")
return True
# Prepare the data structure
data = {
"last_updated": datetime.now().isoformat(),
"recent_changes": recent_changes
}
try:
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(f"Successfully saved {len(recent_changes)} recent changes to {OUTPUT_FILE}")
return True
except IOError as e:
logger.error(f"Error saving results to {OUTPUT_FILE}: {e}")
return False
def main():
"""Main function to execute the script"""
parser = argparse.ArgumentParser(description="Fetch recent changes from the OSM wiki French namespace")
parser.add_argument("--dry-run", action="store_true", help="Run without saving results to file")
parser.add_argument("--force", action="store_true", help="Force update even if cache is fresh")
args = parser.parse_args()
logger.info("Starting fetch_recent_changes.py")
# Check if cache is fresh
if is_cache_fresh() and not args.force:
logger.info(f"Cache is still fresh (less than {CACHE_DURATION.total_seconds()/3600} hours old)")
logger.info(f"Use --force to update anyway")
return
# Get the recent changes page content
html_content = get_page_content(RECENT_CHANGES_URL)
if not html_content:
logger.error("Failed to get recent changes page content")
return
# Extract recent changes
recent_changes = extract_recent_changes(html_content)
if not recent_changes:
logger.warning("No recent changes found")
# Save results
success = save_results(recent_changes, args.dry_run)
if success:
logger.info("Script completed successfully")
else:
logger.error("Script completed with errors")
if __name__ == "__main__":
main()