diff --git a/public/recent_changes.json b/public/recent_changes.json new file mode 100644 index 0000000..953c842 --- /dev/null +++ b/public/recent_changes.json @@ -0,0 +1,4 @@ +{ + "last_updated": "2025-08-22T18:13:20.641943", + "recent_changes": [] +} \ No newline at end of file diff --git a/src/Controller/WikiController.php b/src/Controller/WikiController.php index a54c25e..f243cce 100644 --- a/src/Controller/WikiController.php +++ b/src/Controller/WikiController.php @@ -8,6 +8,112 @@ use Symfony\Component\Routing\Annotation\Route; class WikiController extends AbstractController { + /** + * Detects incorrect heading hierarchies in a list of sections + * For example, h4 directly under h2 without h3 in between + * + * @param array $sections List of sections with 'level' and 'title' keys + * @return array List of section indices with hierarchy errors + */ + private function detectHeadingHierarchyErrors(array $sections): array + { + $errors = []; + $lastLevel = 0; + + foreach ($sections as $index => $section) { + $currentLevel = isset($section['level']) ? (int)$section['level'] : 0; + + // Skip if level is not set or is 0 + if ($currentLevel === 0) { + continue; + } + + // If this is the first section, just record its level + if ($lastLevel === 0) { + $lastLevel = $currentLevel; + continue; + } + + // Check if the level jump is more than 1 + // For example, h2 -> h4 (skipping h3) + if ($currentLevel > $lastLevel + 1) { + $errors[] = $index; + } + + $lastLevel = $currentLevel; + } + + return $errors; + } + #[Route('/wiki/recent-changes', name: 'app_admin_wiki_recent_changes')] + public function recentChanges(): Response + { + $recentChangesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/recent_changes.json'; + + // Initialize arrays + $recentChanges = []; + $lastUpdated = null; + + // Check if the recent changes file exists and load it + if (file_exists($recentChangesFile)) { + $recentChangesData = json_decode(file_get_contents($recentChangesFile), true); + + if (isset($recentChangesData['recent_changes']) && is_array($recentChangesData['recent_changes'])) { + $recentChanges = $recentChangesData['recent_changes']; + $lastUpdated = isset($recentChangesData['last_updated']) ? $recentChangesData['last_updated'] : null; + } + + // Check if the data is older than 1 hour + if ($lastUpdated) { + $lastUpdatedTime = new \DateTime($lastUpdated); + $now = new \DateTime(); + $diff = $now->diff($lastUpdatedTime); + + // If older than 1 hour, refresh the data + if ($diff->h >= 1 || $diff->days > 0) { + $this->refreshRecentChangesData(); + return $this->redirectToRoute('app_admin_wiki_recent_changes'); + } + } + } else { + // If the file doesn't exist, try to create it by running the script + $this->refreshRecentChangesData(); + + // Check if the file was created + if (file_exists($recentChangesFile)) { + return $this->redirectToRoute('app_admin_wiki_recent_changes'); + } else { + $this->addFlash('error', 'Impossible de générer le fichier des changements récents.'); + } + } + + return $this->render('admin/wiki_recent_changes.html.twig', [ + 'recent_changes' => $recentChanges, + 'last_updated' => $lastUpdated + ]); + } + + /** + * Refresh the recent changes data by running the fetch_recent_changes.py script + */ + private function refreshRecentChangesData(): void + { + try { + $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_recent_changes.py'; + if (file_exists($scriptPath)) { + exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode); + + if ($returnCode !== 0) { + $this->addFlash('warning', 'Impossible de mettre à jour les changements récents. Erreur: ' . implode("\n", $output)); + } + } else { + $this->addFlash('error', 'Le script fetch_recent_changes.py n\'existe pas.'); + } + } catch (\Exception $e) { + $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); + } + } + #[Route('/wiki/missing-translations', name: 'app_admin_wiki_missing_translations')] public function missingTranslations(): Response { @@ -664,11 +770,149 @@ class WikiController extends AbstractController $mediaComparison['fr_only_count'] = count($frOnlyImages); } + // Get link comparison data + $linkComparison = $page['link_comparison'] ?? null; + + // Sort links alphabetically by URL if link comparison exists + if ($linkComparison) { + // Sort English-only links + if (isset($linkComparison['en_only']) && is_array($linkComparison['en_only'])) { + usort($linkComparison['en_only'], function($a, $b) { + return strcmp($a['href'], $b['href']); + }); + } + + // Sort French-only links + if (isset($linkComparison['fr_only']) && is_array($linkComparison['fr_only'])) { + usort($linkComparison['fr_only'], function($a, $b) { + return strcmp($a['href'], $b['href']); + }); + } + + // Sort common links + if (isset($linkComparison['common']) && is_array($linkComparison['common'])) { + usort($linkComparison['common'], function($a, $b) { + return strcmp($a['en']['href'], $b['en']['href']); + }); + } + } + + // Get section comparison data and filter out "Contents" sections + $sectionComparison = $page['section_comparison'] ?? null; + + // Filter out "Contents" sections if section comparison exists + if ($sectionComparison) { + // Filter common sections + if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { + $sectionComparison['common'] = array_filter($sectionComparison['common'], function($section) { + // Skip if either English or French title is "Contents" + return !($section['en']['title'] === 'Contents' || $section['fr']['title'] === 'Sommaire'); + }); + // Re-index array + $sectionComparison['common'] = array_values($sectionComparison['common']); + } + + // Filter English-only sections + if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) { + $sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function($section) { + return $section['title'] !== 'Contents'; + }); + // Re-index array + $sectionComparison['en_only'] = array_values($sectionComparison['en_only']); + } + + // Filter French-only sections + if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) { + $sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function($section) { + return $section['title'] !== 'Sommaire'; + }); + // Re-index array + $sectionComparison['fr_only'] = array_values($sectionComparison['fr_only']); + } + } + + // Calculate adjusted section counts (excluding "Contents" sections) + $enSectionCount = $enPage['sections']; + $frSectionCount = $frPage['sections']; + + // Adjust section counts if we have section comparison data + if ($sectionComparison) { + // Count how many "Contents" sections were filtered out + $contentsFilteredCount = 0; + + // Check common sections that were filtered + if (isset($page['section_comparison']['common']) && is_array($page['section_comparison']['common'])) { + foreach ($page['section_comparison']['common'] as $section) { + if ($section['en']['title'] === 'Contents' || $section['fr']['title'] === 'Sommaire') { + $contentsFilteredCount++; + } + } + } + + // Check English-only sections that were filtered + if (isset($page['section_comparison']['en_only']) && is_array($page['section_comparison']['en_only'])) { + foreach ($page['section_comparison']['en_only'] as $section) { + if ($section['title'] === 'Contents') { + $contentsFilteredCount++; + } + } + } + + // Check French-only sections that were filtered + if (isset($page['section_comparison']['fr_only']) && is_array($page['section_comparison']['fr_only'])) { + foreach ($page['section_comparison']['fr_only'] as $section) { + if ($section['title'] === 'Sommaire') { + $contentsFilteredCount++; + } + } + } + + // Adjust section counts + $enSectionCount -= $contentsFilteredCount; + $frSectionCount -= $contentsFilteredCount; + } + + // Check for incorrect heading hierarchies + $enHierarchyErrors = []; + $frHierarchyErrors = []; + + // Check English sections + if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) { + $enHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['en_only']); + } + + // Also check common sections (English side) + if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { + $commonEnSections = array_map(function($section) { + return $section['en']; + }, $sectionComparison['common']); + + $enHierarchyErrors = array_merge($enHierarchyErrors, $this->detectHeadingHierarchyErrors($commonEnSections)); + } + + // Check French sections + if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) { + $frHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['fr_only']); + } + + // Also check common sections (French side) + if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { + $commonFrSections = array_map(function($section) { + return $section['fr']; + }, $sectionComparison['common']); + + $frHierarchyErrors = array_merge($frHierarchyErrors, $this->detectHeadingHierarchyErrors($commonFrSections)); + } + $detailedComparison = [ - 'section_comparison' => $page['section_comparison'] ?? null, - 'link_comparison' => $page['link_comparison'] ?? null, + 'section_comparison' => $sectionComparison, + 'link_comparison' => $linkComparison, 'media_comparison' => $mediaComparison, - 'category_comparison' => $page['category_comparison'] ?? null + 'category_comparison' => $page['category_comparison'] ?? null, + 'adjusted_en_section_count' => $enSectionCount, + 'adjusted_fr_section_count' => $frSectionCount, + 'en_hierarchy_errors' => $enHierarchyErrors, + 'fr_hierarchy_errors' => $frHierarchyErrors ]; $mediaDiff = $page['media_diff'] ?? 0; @@ -841,6 +1085,15 @@ class WikiController extends AbstractController } } + // Ensure page URLs are strings to prevent array to string conversion errors + if ($frPage && isset($frPage['url']) && is_array($frPage['url'])) { + $frPage['url'] = json_encode($frPage['url']); + } + + if ($enPage && isset($enPage['url']) && is_array($enPage['url'])) { + $enPage['url'] = json_encode($enPage['url']); + } + return $this->render('admin/wiki_compare.html.twig', [ 'key' => $key, 'en_page' => $enPage, diff --git a/templates/admin/_wiki_navigation.html.twig b/templates/admin/_wiki_navigation.html.twig index 502691b..1d0daeb 100644 --- a/templates/admin/_wiki_navigation.html.twig +++ b/templates/admin/_wiki_navigation.html.twig @@ -41,6 +41,11 @@ Groupes OSM-FR + diff --git a/templates/admin/wiki_compare.html.twig b/templates/admin/wiki_compare.html.twig index dfa047b..13f54df 100644 --- a/templates/admin/wiki_compare.html.twig +++ b/templates/admin/wiki_compare.html.twig @@ -58,7 +58,13 @@

Sections en anglais

- {{ en_page.sections }} sections + + {% if detailed_comparison.adjusted_en_section_count is defined %} + {{ detailed_comparison.adjusted_en_section_count }} sections ({{ en_page.sections }} avec "Contents") + {% else %} + {{ en_page.sections }} sections + {% endif %} +

Sections alignées par hiérarchie

@@ -67,6 +73,9 @@
  • {# h{{ section.en.level }} #} {{ section.en.title }} + {% if detailed_comparison.en_hierarchy_errors is defined and loop.index0 in detailed_comparison.en_hierarchy_errors %} + ! + {% endif %}
  • {% endfor %} @@ -74,6 +83,9 @@
  • {# h{{ section.level }} #} {{ section.title }} + {% if detailed_comparison.en_hierarchy_errors is defined and (detailed_comparison.section_comparison.common|length + loop.index0) in detailed_comparison.en_hierarchy_errors %} + ! + {% endif %}
  • {% endfor %} @@ -84,7 +96,13 @@

    Sections en français

    - {{ fr_page.sections }} sections + + {% if detailed_comparison.adjusted_fr_section_count is defined %} + {{ detailed_comparison.adjusted_fr_section_count }} sections ({{ fr_page.sections }} avec "Sommaire") + {% else %} + {{ fr_page.sections }} sections + {% endif %} +

    Sections alignées par hiérarchie

    @@ -93,6 +111,9 @@
  • {# h{{ section.fr.level }} #} {{ section.fr.title }} + {% if detailed_comparison.fr_hierarchy_errors is defined and loop.index0 in detailed_comparison.fr_hierarchy_errors %} + ! + {% endif %}
  • {% endfor %} @@ -100,7 +121,9 @@
  • {# h{{ section.level }} #} {{ section.title }} - + {% if detailed_comparison.fr_hierarchy_errors is defined and (detailed_comparison.section_comparison.common|length + loop.index0) in detailed_comparison.fr_hierarchy_errors %} + ! + {% endif %}
  • {% endfor %} @@ -227,59 +250,41 @@
    -

    Liens uniquement en anglais - ({{ detailed_comparison.link_comparison.en_only|length }})

    +

    Comparaison des liens côte à côte

    - +
    - - + + + + - {% for link in detailed_comparison.link_comparison.en_only %} + {% set en_links = detailed_comparison.link_comparison.en_only %} + {% set fr_links = detailed_comparison.link_comparison.fr_only %} + {% set max_links = max(en_links|length, fr_links|length) %} + + {% for i in 0..(max_links - 1) %} - - + {% if i < en_links|length %} + + + {% else %} + + + {% endif %} + + {% if i < fr_links|length %} + + + {% else %} + + + {% endif %} {% endfor %} - - -
    TexteURLTexte ENURL ENTexte FRURL FR
    {{ link.text }}{{ link.href|slice(0, 30) }}...{{ en_links[i].text }}{{ en_links[i].href|slice(0, 30) }}...{{ fr_links[i].text }}{{ fr_links[i].href|slice(0, 30) }}...
    -
    -
    -
    -
    -
    -
    -
    -

    Liens en français

    - {{ fr_page.link_count }} liens -
    -
    - - -

    Liens uniquement en français - ({{ detailed_comparison.link_comparison.fr_only|length }})

    -
    - - - - - - - - - {% for link in detailed_comparison.link_comparison.fr_only %} - - - - - {% endfor %} -
    TexteURL
    {{ link.text }}{{ link.href|slice(0, 30) }}...
    @@ -377,7 +382,13 @@
    • Sections - {{ en_page.sections }} + + {% if detailed_comparison.adjusted_en_section_count is defined %} + {{ detailed_comparison.adjusted_en_section_count }} + {% else %} + {{ en_page.sections }} + {% endif %} +
    • Mots @@ -510,7 +521,13 @@
      • Sections - {{ en_page.sections }} + + {% if detailed_comparison.adjusted_en_section_count is defined %} + {{ detailed_comparison.adjusted_en_section_count }} + {% else %} + {{ en_page.sections }} + {% endif %} +
      • Mots @@ -547,7 +564,13 @@
        • Sections - {{ fr_page.sections }} + + {% if detailed_comparison.adjusted_fr_section_count is defined %} + {{ detailed_comparison.adjusted_fr_section_count }} + {% else %} + {{ fr_page.sections }} + {% endif %} +
        • Mots diff --git a/templates/admin/wiki_osm_fr_groups.html.twig b/templates/admin/wiki_osm_fr_groups.html.twig index 57064f4..b314eb3 100644 --- a/templates/admin/wiki_osm_fr_groups.html.twig +++ b/templates/admin/wiki_osm_fr_groups.html.twig @@ -96,26 +96,103 @@
    {% if local_groups|length > 0 %} + +
    +
    + + + + + +
    +
    +
    {% for group in local_groups %} -
    -
    + {% set source = group.source|default('wiki') %} + {% set has_wiki = group.has_wiki_page|default(true) %} + {% set filter_classes = source ~ ' ' ~ (has_wiki ? 'has-wiki' : 'no-wiki') %} + +
    +
    + {% if source == 'framacalc' %} +
    + Framacalc + {% if has_wiki %} + Page wiki + {% else %} + Pas de page wiki + {% endif %} +
    + {% endif %}
    {{ group.name }}
    {% if group.description %}

    {{ group.description }}

    {% endif %} + + {% if source == 'framacalc' and group.contact %} +

    Contact: {{ group.contact }}

    + {% endif %} + + {% if source == 'framacalc' and group.website %} +

    + + Site web + +

    + {% endif %}
    {% endfor %}
    + + + {% else %}

    Aucun groupe local n'a été trouvé.

    diff --git a/templates/admin/wiki_random_suggestion.html.twig b/templates/admin/wiki_random_suggestion.html.twig index 3699d7c..d079448 100644 --- a/templates/admin/wiki_random_suggestion.html.twig +++ b/templates/admin/wiki_random_suggestion.html.twig @@ -3,120 +3,122 @@ {% block title %}Suggestion de page Wiki à améliorer{% endblock %} {% block body %} -
    - {% include 'admin/_wiki_navigation.html.twig' %} - -

    Suggestion de page Wiki à améliorer

    -

    Voici une page wiki qui a besoin d'être améliorée.

    +
    + {% include 'admin/_wiki_navigation.html.twig' %} -
    -
    -

    {{ page.key }}

    -
    -
    -
    -

    Raisons d'amélioration

    -

    {{ page.reason }}

    +

    Suggestion de page Wiki à améliorer

    +

    Voici une page wiki qui a besoin d'être améliorée.

    + +
    +
    +

    {{ page.key }}

    +
    +
    +

    Raisons d'amélioration

    +

    {{ page.reason }}

    +
    -
    -
    -
    -
    -

    Version anglaise

    -

    - Dernière modification: {{ page.en_page.last_modified }} -

    +
    +
    +
    +
    +

    Version anglaise

    +

    + Dernière modification: {{ page.en_page.last_modified }} +

    +
    +
    +
      +
    • + Sections + {{ page.en_page.sections }} +
    • +
    • + Mots + {{ page.en_page.word_count|default(0) }} +
    • +
    • + Liens + {{ page.en_page.link_count|default(0) }} +
    • +
    + +
    -
    -
      -
    • - Sections - {{ page.en_page.sections }} -
    • -
    • - Mots - {{ page.en_page.word_count|default(0) }} -
    • -
    • - Liens - {{ page.en_page.link_count|default(0) }} -
    • -
    - +
    +
    +
    +

    Version française

    + {% if page.fr_page %} +

    + Dernière modification: {{ page.fr_page.last_modified }} +

    + {% else %} +

    + Page non existante +

    + {% endif %} +
    +
    + {% if page.fr_page %} +
      +
    • + Sections + {{ page.fr_page.sections }} +
    • +
    • + Mots + {{ page.fr_page.word_count|default(0) }} +
    • +
    • + Liens + {{ page.fr_page.link_count|default(0) }} +
    • +
    + + {% else %} +
    +

    La page wiki pour la clé + "{{ page.key }}" n'existe pas en français.

    +

    Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.

    +
    + + {% endif %}
    -
    -
    -
    -

    Version française

    - {% if page.fr_page %} -

    - Dernière modification: {{ page.fr_page.last_modified }} -

    - {% else %} -

    - Page non existante -

    - {% endif %} -
    -
    - {% if page.fr_page %} -
      -
    • - Sections - {{ page.fr_page.sections }} -
    • -
    • - Mots - {{ page.fr_page.word_count|default(0) }} -
    • -
    • - Liens - {{ page.fr_page.link_count|default(0) }} -
    • -
    - - {% else %} -
    -

    La page wiki pour la clé "{{ page.key }}" n'existe pas en français.

    -

    Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.

    -
    - - {% endif %} -
    -
    + +
    +
    - +
    - - -
    {% endblock %} \ No newline at end of file diff --git a/templates/admin/wiki_recent_changes.html.twig b/templates/admin/wiki_recent_changes.html.twig new file mode 100644 index 0000000..96772b1 --- /dev/null +++ b/templates/admin/wiki_recent_changes.html.twig @@ -0,0 +1,87 @@ +{% extends 'base.html.twig' %} + +{% block title %}Changements récents Wiki OSM{% endblock %} + +{% block body %} +
    + {% include 'admin/_wiki_navigation.html.twig' %} + +

    Changements récents Wiki OpenStreetMap

    +

    Liste des changements récents dans l'espace de noms français du wiki OpenStreetMap.

    + + {% if last_updated %} +
    + Dernière mise à jour : {{ last_updated|date('d/m/Y H:i') }} +
    + {% endif %} + +
    +
    +

    Changements récents

    +

    + + Voir sur le wiki OSM + +

    +
    +
    + {% if recent_changes|length > 0 %} +
    + + + + + + + + + + + + + {% for change in recent_changes %} + + + + + + + + + {% endfor %} + +
    PageDateUtilisateurCommentaireTailleActions
    + {{ change.page_name }} + {{ change.timestamp }}{{ change.user }}{{ change.comment }}{{ change.change_size }} + + Voir + +
    +
    + {% else %} +
    +

    Aucun changement récent n'a été trouvé.

    +
    + {% endif %} +
    +
    + +
    +
    +

    À propos des changements récents

    +
    +
    +

    Cette page affiche les changements récents dans l'espace de noms français (FR:) du wiki OpenStreetMap.

    +

    Ces informations sont utiles pour suivre les traductions manquantes et les mises à jour des pages wiki.

    +

    Les données sont mises à jour automatiquement toutes les heures.

    +
    +
    + + +
    +{% endblock %} \ No newline at end of file diff --git a/wiki_compare/CHANGES.md b/wiki_compare/CHANGES.md new file mode 100644 index 0000000..065c6fd --- /dev/null +++ b/wiki_compare/CHANGES.md @@ -0,0 +1,66 @@ +# Changements implémentés + +Ce document résume les changements et nouvelles fonctionnalités implémentés dans le cadre de la mise à jour du système de gestion des pages wiki OSM. + +## 1. Suivi des changements récents du wiki OSM + +### Fonctionnalités ajoutées +- Création d'un script `fetch_recent_changes.py` qui récupère les changements récents dans l'espace de noms français du wiki OSM +- Ajout d'une nouvelle route `/wiki/recent-changes` dans le contrôleur WikiController +- Création d'un template `wiki_recent_changes.html.twig` pour afficher les changements récents +- Mise à jour de la navigation pour inclure un lien vers la page des changements récents + +### Utilisation +- Les changements récents sont automatiquement récupérés toutes les heures +- La page affiche la liste des pages modifiées récemment avec des liens vers ces pages + +## 2. Validation de la hiérarchie des titres + +### Fonctionnalités ajoutées +- Implémentation d'une logique de détection des hiérarchies de titres incorrectes (par exemple, h4 directement sous h2 sans h3 intermédiaire) +- Ajout d'indicateurs visuels (badges) pour signaler les hiérarchies incorrectes dans les listes de sections +- Mise à jour du template `wiki_compare.html.twig` pour afficher ces indicateurs + +### Utilisation +- Les hiérarchies incorrectes sont automatiquement détectées lors de la comparaison des pages wiki +- Un badge rouge avec un point d'exclamation est affiché à côté des titres ayant une hiérarchie incorrecte + +## 3. Vérification des groupes locaux + +### Fonctionnalités ajoutées +- Mise à jour du script `fetch_osm_fr_groups.py` pour récupérer les données des groupes locaux depuis Framacalc +- Ajout d'une fonctionnalité de vérification de l'existence d'une page wiki pour chaque groupe +- Mise à jour du template `wiki_osm_fr_groups.html.twig` pour afficher les résultats de vérification +- Ajout de filtres pour faciliter la navigation parmi les groupes + +### Utilisation +- Les groupes sont affichés avec des badges indiquant leur source (wiki ou Framacalc) +- Les groupes sans page wiki sont mis en évidence avec un badge rouge +- Les filtres permettent de voir uniquement les groupes d'une certaine catégorie (tous, wiki, Framacalc, avec page wiki, sans page wiki) + +## Limitations connues + +1. **Accès aux données externes** : Les scripts peuvent rencontrer des difficultés pour accéder aux données externes (wiki OSM, Framacalc) en fonction de l'environnement d'exécution. + +2. **Détection des hiérarchies** : La détection des hiérarchies incorrectes se base uniquement sur les niveaux des titres et ne prend pas en compte le contenu ou la sémantique. + +3. **Correspondance des groupes** : La correspondance entre les groupes Framacalc et les pages wiki se fait par une comparaison approximative des noms, ce qui peut parfois donner des résultats imprécis. + +## Maintenance future + +### Scripts Python +- Les scripts Python sont situés dans le répertoire `wiki_compare/` +- Ils peuvent être exécutés manuellement ou via des tâches cron +- L'option `--dry-run` permet de tester les scripts sans modifier les fichiers +- L'option `--force` permet de forcer la mise à jour même si le cache est récent + +### Templates Twig +- Les templates sont situés dans le répertoire `templates/admin/` +- `wiki_recent_changes.html.twig` : Affichage des changements récents +- `wiki_compare.html.twig` : Comparaison des pages wiki avec validation de hiérarchie +- `wiki_osm_fr_groups.html.twig` : Affichage des groupes locaux avec vérification des pages wiki + +### Contrôleur +- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement +- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies +- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour \ No newline at end of file diff --git a/wiki_compare/fetch_osm_fr_groups.py b/wiki_compare/fetch_osm_fr_groups.py index 0189730..565cdd3 100755 --- a/wiki_compare/fetch_osm_fr_groups.py +++ b/wiki_compare/fetch_osm_fr_groups.py @@ -4,9 +4,11 @@ """ fetch_osm_fr_groups.py -This script scrapes the OpenStreetMap wiki page for France/OSM-FR to extract -information about local working groups. It specifically targets links in the -#Pages_des_groupes_locaux section. +This script fetches information about OSM-FR local groups from two sources: +1. The OpenStreetMap wiki page for France/OSM-FR (specifically the #Pages_des_groupes_locaux section) +2. The Framacalc spreadsheet at https://framacalc.org/osm-groupes-locaux + +It then verifies that each group from the Framacalc has a corresponding wiki page. Usage: python fetch_osm_fr_groups.py [--dry-run] [--force] @@ -24,6 +26,8 @@ import json import argparse import logging import os +import csv +import io from datetime import datetime, timedelta import requests from bs4 import BeautifulSoup @@ -40,6 +44,8 @@ logger = logging.getLogger(__name__) OUTPUT_FILE = "osm_fr_groups.json" BASE_URL = "https://wiki.openstreetmap.org/wiki/France/OSM-FR" WIKI_BASE_URL = "https://wiki.openstreetmap.org" +FRAMACALC_URL = "https://framacalc.org/osm-groupes-locaux/export/csv" +WIKI_GROUPS_URL = "https://wiki.openstreetmap.org/wiki/France/OSM-FR#Groupes_locaux" CACHE_DURATION = timedelta(hours=1) # Cache duration of 1 hour def is_cache_fresh(): @@ -141,7 +147,7 @@ def extract_working_groups(html_content): logger.info(f"Found {len(working_groups)} working groups") return working_groups -def extract_local_groups(html_content): +def extract_local_groups_from_wiki(html_content): """ Extract local groups from the wiki page HTML @@ -193,13 +199,170 @@ def extract_local_groups(html_content): "name": name, "url": url, "description": description, - "type": "local_group" + "type": "local_group", + "source": "wiki" }) current = current.next_sibling - logger.info(f"Found {len(local_groups)} local groups") + logger.info(f"Found {len(local_groups)} local groups from wiki") return local_groups +def fetch_framacalc_data(): + """ + Fetch local groups data from Framacalc + + Returns: + list: List of local group dictionaries from Framacalc + """ + try: + response = requests.get(FRAMACALC_URL) + response.raise_for_status() + + # Parse CSV data + csv_data = csv.reader(io.StringIO(response.text)) + rows = list(csv_data) + + # Check if we have data + if len(rows) < 2: + logger.warning("No data found in Framacalc CSV") + return [] + + # Extract headers (first row) + headers = rows[0] + + # Find the indices of important columns + name_idx = -1 + contact_idx = -1 + website_idx = -1 + + for i, header in enumerate(headers): + header_lower = header.lower() + if 'nom' in header_lower or 'groupe' in header_lower: + name_idx = i + elif 'contact' in header_lower or 'email' in header_lower: + contact_idx = i + elif 'site' in header_lower or 'web' in header_lower: + website_idx = i + + if name_idx == -1: + logger.warning("Could not find name column in Framacalc CSV") + return [] + + # Process data rows + local_groups = [] + for row in rows[1:]: # Skip header row + if len(row) <= name_idx or not row[name_idx].strip(): + continue # Skip empty rows + + name = row[name_idx].strip() + contact = row[contact_idx].strip() if contact_idx != -1 and contact_idx < len(row) else "" + website = row[website_idx].strip() if website_idx != -1 and website_idx < len(row) else "" + + local_groups.append({ + "name": name, + "contact": contact, + "website": website, + "type": "local_group", + "source": "framacalc", + "has_wiki_page": False, # Will be updated later + "wiki_url": "" # Will be updated later + }) + + logger.info(f"Found {len(local_groups)} local groups from Framacalc") + return local_groups + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching Framacalc data: {e}") + return [] + except Exception as e: + logger.error(f"Error processing Framacalc data: {e}") + return [] + +def extract_wiki_group_links(): + """ + Extract links to local group wiki pages from the OSM-FR wiki page + + Returns: + dict: Dictionary mapping group names to wiki URLs + """ + try: + # Get the wiki page content + response = requests.get(WIKI_GROUPS_URL) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + wiki_links = {} + + # Find the "Pages des groupes locaux" section + pages_section = None + for heading in soup.find_all(['h2', 'h3', 'h4']): + if 'Pages des groupes locaux' in heading.get_text(): + pages_section = heading + break + + if not pages_section: + logger.warning("Could not find 'Pages des groupes locaux' section") + return {} + + # Get the content following the heading until the next heading + current = pages_section.next_sibling + while current and not current.name in ['h2', 'h3', 'h4']: + if current.name == 'ul': + # Process list items + for li in current.find_all('li', recursive=False): + text = li.get_text().strip() + link = li.find('a') + + if link and text: + # Extract group name (before the comma) + parts = text.split(',', 1) + group_name = parts[0].strip() + + url = WIKI_BASE_URL + link.get('href') if link.get('href').startswith('/') else link.get('href') + wiki_links[group_name] = url + + current = current.next_sibling + + logger.info(f"Found {len(wiki_links)} wiki links for local groups") + return wiki_links + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching wiki group links: {e}") + return {} + except Exception as e: + logger.error(f"Error processing wiki group links: {e}") + return {} + +def verify_framacalc_groups_have_wiki(framacalc_groups, wiki_links): + """ + Verify that each group from Framacalc has a corresponding wiki page + + Args: + framacalc_groups (list): List of local group dictionaries from Framacalc + wiki_links (dict): Dictionary mapping group names to wiki URLs + + Returns: + list: Updated list of local group dictionaries with wiki verification + """ + for group in framacalc_groups: + group_name = group['name'] + + # Try to find a matching wiki link + found = False + for wiki_name, wiki_url in wiki_links.items(): + # Check if the group name is similar to the wiki name + if group_name.lower() in wiki_name.lower() or wiki_name.lower() in group_name.lower(): + group['has_wiki_page'] = True + group['wiki_url'] = wiki_url + found = True + break + + if not found: + group['has_wiki_page'] = False + group['wiki_url'] = "" + + return framacalc_groups + def extract_umap_url(html_content): """ Extract the uMap URL for OSM-FR local groups @@ -223,14 +386,16 @@ def extract_umap_url(html_content): return None -def save_results(local_groups, working_groups, umap_url, dry_run=False): +def save_results(wiki_local_groups, framacalc_groups, working_groups, umap_url, wiki_links, dry_run=False): """ Save the results to a JSON file Args: - local_groups (list): List of local group dictionaries + wiki_local_groups (list): List of local group dictionaries from wiki + framacalc_groups (list): List of local group dictionaries from Framacalc working_groups (list): List of working group dictionaries umap_url (str): URL to the uMap for local groups + wiki_links (dict): Dictionary mapping group names to wiki URLs dry_run (bool): If True, don't actually save to file Returns: @@ -238,28 +403,41 @@ def save_results(local_groups, working_groups, umap_url, dry_run=False): """ if dry_run: logger.info("DRY RUN: Would have saved results to file") - logger.info(f"Local groups: {len(local_groups)}") - for group in local_groups: + logger.info(f"Wiki local groups: {len(wiki_local_groups)}") + for group in wiki_local_groups[:5]: # Show only first 5 for brevity logger.info(f" - {group['name']}: {group['url']}") + + logger.info(f"Framacalc groups: {len(framacalc_groups)}") + for group in framacalc_groups[:5]: # Show only first 5 for brevity + wiki_status = "Has wiki page" if group.get('has_wiki_page') else "No wiki page" + logger.info(f" - {group['name']}: {wiki_status}") + logger.info(f"Working groups: {len(working_groups)}") - for group in working_groups: + for group in working_groups[:5]: # Show only first 5 for brevity logger.info(f" - {group['name']}: {group['url']}") + if umap_url: logger.info(f"uMap URL: {umap_url}") + + logger.info(f"Wiki links: {len(wiki_links)}") return True + # Combine all local groups + all_local_groups = wiki_local_groups + framacalc_groups + # Prepare the data structure data = { "last_updated": datetime.now().isoformat(), - "local_groups": local_groups, + "local_groups": all_local_groups, "working_groups": working_groups, - "umap_url": umap_url + "umap_url": umap_url, + "wiki_links": wiki_links } try: with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) - logger.info(f"Successfully saved {len(local_groups)} local groups and {len(working_groups)} working groups to {OUTPUT_FILE}") + logger.info(f"Successfully saved {len(all_local_groups)} local groups and {len(working_groups)} working groups to {OUTPUT_FILE}") return True except IOError as e: logger.error(f"Error saving results to {OUTPUT_FILE}: {e}") @@ -267,7 +445,7 @@ def save_results(local_groups, working_groups, umap_url, dry_run=False): def main(): """Main function to execute the script""" - parser = argparse.ArgumentParser(description="Scrape OSM-FR local groups from the wiki") + parser = argparse.ArgumentParser(description="Fetch OSM-FR local groups from wiki and Framacalc") parser.add_argument("--dry-run", action="store_true", help="Run without saving results to file") parser.add_argument("--force", action="store_true", help="Force update even if cache is fresh") args = parser.parse_args() @@ -287,11 +465,11 @@ def main(): logger.error("Failed to get wiki page content") return - # Extract local groups - local_groups = extract_local_groups(html_content) + # Extract local groups from wiki + wiki_local_groups = extract_local_groups_from_wiki(html_content) - if not local_groups: - logger.warning("No local groups found") + if not wiki_local_groups: + logger.warning("No local groups found in wiki") # Extract working groups working_groups = extract_working_groups(html_content) @@ -304,8 +482,31 @@ def main(): # Extract uMap URL umap_url = extract_umap_url(html_content) + # Fetch local groups from Framacalc + framacalc_groups = fetch_framacalc_data() + + if not framacalc_groups: + logger.warning("No local groups found in Framacalc") + + # Extract wiki group links + wiki_links = extract_wiki_group_links() + + if not wiki_links: + logger.warning("No wiki links found for local groups") + + # Verify Framacalc groups have wiki pages + if framacalc_groups and wiki_links: + framacalc_groups = verify_framacalc_groups_have_wiki(framacalc_groups, wiki_links) + + # Count groups with and without wiki pages + groups_with_wiki = sum(1 for group in framacalc_groups if group.get('has_wiki_page')) + groups_without_wiki = sum(1 for group in framacalc_groups if not group.get('has_wiki_page')) + + logger.info(f"Framacalc groups with wiki pages: {groups_with_wiki}") + logger.info(f"Framacalc groups without wiki pages: {groups_without_wiki}") + # Save results - success = save_results(local_groups, working_groups, umap_url, args.dry_run) + success = save_results(wiki_local_groups, framacalc_groups, working_groups, umap_url, wiki_links, args.dry_run) if success: logger.info("Script completed successfully") diff --git a/wiki_compare/fetch_recent_changes.py b/wiki_compare/fetch_recent_changes.py new file mode 100644 index 0000000..4147ad7 --- /dev/null +++ b/wiki_compare/fetch_recent_changes.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +fetch_recent_changes.py + +This script fetches recent changes from the OpenStreetMap wiki for the French namespace +and stores the URLs of these pages. It specifically targets the recent changes page: +https://wiki.openstreetmap.org/w/index.php?hidebots=1&hidepreviousrevisions=1&hidecategorization=1&hideWikibase=1&hidelog=1&hidenewuserlog=1&namespace=202&limit=500&days=30&enhanced=1&title=Special:RecentChanges&urlversion=2 + +Usage: + python fetch_recent_changes.py [--dry-run] [--force] + +Options: + --dry-run Run the script without saving the results to a file + --force Force update even if the cache is still fresh (less than 1 hour old) + +Output: + - recent_changes.json: JSON file with information about recent changes in the French namespace + - Log messages about the scraping process and results +""" + +import json +import argparse +import logging +import os +from datetime import datetime, timedelta +import requests +from bs4 import BeautifulSoup + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + +# Constants +OUTPUT_FILE = "recent_changes.json" +RECENT_CHANGES_URL = "https://wiki.openstreetmap.org/w/index.php?hidebots=1&hidepreviousrevisions=1&hidecategorization=1&hideWikibase=1&hidelog=1&hidenewuserlog=1&namespace=202&limit=500&days=30&enhanced=1&title=Special:RecentChanges&urlversion=2" +WIKI_BASE_URL = "https://wiki.openstreetmap.org" +CACHE_DURATION = timedelta(hours=1) # Cache duration of 1 hour + +def is_cache_fresh(): + """ + Check if the cache file exists and is less than CACHE_DURATION old + + Returns: + bool: True if cache is fresh, False otherwise + """ + if not os.path.exists(OUTPUT_FILE): + return False + + try: + with open(OUTPUT_FILE, 'r', encoding='utf-8') as f: + data = json.load(f) + last_updated = datetime.fromisoformat(data.get('last_updated', '2000-01-01T00:00:00')) + now = datetime.now() + return (now - last_updated) < CACHE_DURATION + except (IOError, json.JSONDecodeError, ValueError) as e: + logger.error(f"Error checking cache freshness: {e}") + return False + +def get_page_content(url): + """ + Get the HTML content of a page + + Args: + url (str): URL to fetch + + Returns: + str: HTML content of the page or None if request failed + """ + try: + response = requests.get(url) + response.raise_for_status() + return response.text + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching {url}: {e}") + return None + +def extract_recent_changes(html_content): + """ + Extract recent changes from the wiki page HTML + + Args: + html_content (str): HTML content of the recent changes page + + Returns: + list: List of recent change dictionaries + """ + if not html_content: + return [] + + soup = BeautifulSoup(html_content, 'html.parser') + recent_changes = [] + + # Find the changes list + changes_list = soup.find('ul', class_='special') + + if not changes_list: + logger.warning("Could not find recent changes list") + return [] + + # Process each list item (each change) + for li in changes_list.find_all('li'): + # Extract the page link + page_link = li.find('a', class_='mw-changeslist-title') + if not page_link: + continue + + page_name = page_link.get_text().strip() + page_url = WIKI_BASE_URL + page_link.get('href') + + # Extract the timestamp + timestamp_span = li.find('span', class_='mw-changeslist-date') + timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown" + + # Extract the user + user_link = li.find('a', class_='mw-userlink') + user = user_link.get_text().strip() if user_link else "Unknown" + + # Extract the comment + comment_span = li.find('span', class_='comment') + comment = comment_span.get_text().strip() if comment_span else "" + + # Extract the change size + change_size_span = li.find('span', class_='mw-changeslist-separator').next_sibling + change_size = change_size_span.get_text().strip() if change_size_span else "0" + + recent_changes.append({ + "page_name": page_name, + "page_url": page_url, + "timestamp": timestamp, + "user": user, + "comment": comment, + "change_size": change_size + }) + + logger.info(f"Found {len(recent_changes)} recent changes") + return recent_changes + +def save_results(recent_changes, dry_run=False): + """ + Save the results to a JSON file + + Args: + recent_changes (list): List of recent change dictionaries + dry_run (bool): If True, don't actually save to file + + Returns: + bool: True if saving was successful or dry run, False otherwise + """ + if dry_run: + logger.info("DRY RUN: Would have saved results to file") + logger.info(f"Recent changes: {len(recent_changes)}") + for change in recent_changes[:5]: # Show only first 5 for brevity + logger.info(f" - {change['page_name']}: {change['page_url']} ({change['timestamp']})") + if len(recent_changes) > 5: + logger.info(f" ... and {len(recent_changes) - 5} more") + return True + + # Prepare the data structure + data = { + "last_updated": datetime.now().isoformat(), + "recent_changes": recent_changes + } + + try: + with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logger.info(f"Successfully saved {len(recent_changes)} recent changes to {OUTPUT_FILE}") + return True + except IOError as e: + logger.error(f"Error saving results to {OUTPUT_FILE}: {e}") + return False + +def main(): + """Main function to execute the script""" + parser = argparse.ArgumentParser(description="Fetch recent changes from the OSM wiki French namespace") + parser.add_argument("--dry-run", action="store_true", help="Run without saving results to file") + parser.add_argument("--force", action="store_true", help="Force update even if cache is fresh") + args = parser.parse_args() + + logger.info("Starting fetch_recent_changes.py") + + # Check if cache is fresh + if is_cache_fresh() and not args.force: + logger.info(f"Cache is still fresh (less than {CACHE_DURATION.total_seconds()/3600} hours old)") + logger.info(f"Use --force to update anyway") + return + + # Get the recent changes page content + html_content = get_page_content(RECENT_CHANGES_URL) + + if not html_content: + logger.error("Failed to get recent changes page content") + return + + # Extract recent changes + recent_changes = extract_recent_changes(html_content) + + if not recent_changes: + logger.warning("No recent changes found") + + # Save results + success = save_results(recent_changes, args.dry_run) + + if success: + logger.info("Script completed successfully") + else: + logger.error("Script completed with errors") + +if __name__ == "__main__": + main() \ No newline at end of file