diff --git a/README.md b/README.md index 9b1b08d9..ea5b2847 100644 --- a/README.md +++ b/README.md @@ -259,3 +259,22 @@ Chaque exécution traite une ville si les ressources le permettent. En adaptant ### Remarques - Les CityFollowUp ne sont plus supprimés lors des labourages. - Le système garantit que les villes sont mises à jour dès que possible sans surcharger le serveur. + +# Analyses complémentaires +## Analyse de l'historique des objets dans les villes + +dossier counting_osm_objects + +## Inspection de la fraîcheur des traductions de wiki + +Le dossier `wiki_compare` contient des scripts pour analyser les pages wiki d'OpenStreetMap, identifier celles qui ont besoin de mises à jour ou de traductions, et publier des suggestions sur Mastodon. + +### Scripts disponibles + +- **wiki_compare.py** : Récupère les 10 clés OSM les plus utilisées, compare leurs pages wiki en anglais et en français, et identifie celles qui ont besoin de mises à jour. +- **post_outdated_page.py** : Sélectionne aléatoirement une page wiki française qui n'est pas à jour et publie un message sur Mastodon pour suggérer sa mise à jour. +- **suggest_translation.py** : Identifie les pages wiki anglaises qui n'ont pas de traduction française et publie une suggestion de traduction sur Mastodon. + +### Utilisation + +Consultez le [README du dossier wiki_compare](wiki_compare/README.md) pour plus de détails sur l'installation, la configuration et l'utilisation de ces scripts. \ No newline at end of file diff --git a/counting_osm_objects/.gitignore b/counting_osm_objects/.gitignore index ae2a1d0c..d1d07c1e 100644 --- a/counting_osm_objects/.gitignore +++ b/counting_osm_objects/.gitignore @@ -12,3 +12,4 @@ secrets.sh cookie.txt bin/venv activate +city_analysis \ No newline at end of file diff --git a/src/Controller/WikiController.php b/src/Controller/WikiController.php new file mode 100644 index 00000000..660b2218 --- /dev/null +++ b/src/Controller/WikiController.php @@ -0,0 +1,35 @@ +getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv'; + + if (!file_exists($csvFile)) { + $this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.'); + return $this->redirectToRoute('app_admin_index'); + } + + $csvData = array_map('str_getcsv', file($csvFile)); + $headers = array_shift($csvData); + + $wikiPages = []; + foreach ($csvData as $row) { + $page = array_combine($headers, $row); + $wikiPages[$page['key']][$page['language']] = $page; + } + + return $this->render('admin/wiki.html.twig', [ + 'wiki_pages' => $wikiPages, + ]); + } +} \ No newline at end of file diff --git a/templates/admin/index.html.twig b/templates/admin/index.html.twig index 073f8d1b..262b35a3 100644 --- a/templates/admin/index.html.twig +++ b/templates/admin/index.html.twig @@ -41,6 +41,9 @@ Podium des contributeurs OSM + + Pages Wiki OSM + Retour à l'accueil diff --git a/templates/admin/wiki.html.twig b/templates/admin/wiki.html.twig new file mode 100644 index 00000000..5efb6fd9 --- /dev/null +++ b/templates/admin/wiki.html.twig @@ -0,0 +1,66 @@ +{% extends 'base.html.twig' %} + +{% block title %}Pages Wiki OSM{% endblock %} + +{% block body %} +
+

Pages Wiki OpenStreetMap

+

Comparaison des pages wiki en français et en anglais pour les clés OSM les plus utilisées.

+ +
+
+

Liste des pages wiki

+
+
+
+ + + + + + + + + + + + + + + + + + {% for key, languages in wiki_pages %} + + + + {% if languages['en'] is defined %} + + + + {% else %} + + {% endif %} + + {% if languages['fr'] is defined %} + + + + {% else %} + + {% endif %} + + {% endfor %} + +
CléVersion anglaiseVersion française
SectionsMotsLiensSectionsMotsLiens
+ {{ key }} + {{ languages['en'].sections }}{{ languages['en'].word_count }}{{ languages['en'].link_count }}Page non disponible{{ languages['fr'].sections }}{{ languages['fr'].word_count }}{{ languages['fr'].link_count }}Page non disponible
+
+
+
+ +
+ +
+
+{% endblock %} \ No newline at end of file diff --git a/wiki_compare/README.md b/wiki_compare/README.md new file mode 100644 index 00000000..a248051c --- /dev/null +++ b/wiki_compare/README.md @@ -0,0 +1,184 @@ +# OSM Wiki Compare + +Ce projet contient des scripts pour analyser les pages wiki d'OpenStreetMap, identifier celles qui ont besoin de mises à jour ou de traductions, et publier des suggestions sur Mastodon pour encourager la communauté à contribuer. + +## Vue d'ensemble + +Le projet comprend trois scripts principaux : + +1. **wiki_compare.py** : Récupère les 10 clés OSM les plus utilisées, compare leurs pages wiki en anglais et en français, et identifie celles qui ont besoin de mises à jour. +2. **post_outdated_page.py** : Sélectionne aléatoirement une page wiki française qui n'est pas à jour et publie un message sur Mastodon pour suggérer sa mise à jour. +3. **suggest_translation.py** : Identifie les pages wiki anglaises qui n'ont pas de traduction française et publie une suggestion de traduction sur Mastodon. + +## Installation + +### Prérequis + +- Python 3.6 ou supérieur +- Pip (gestionnaire de paquets Python) + +### Dépendances + +Installez les dépendances requises : + +```bash +pip install requests beautifulsoup4 +``` + +## Configuration + +### Mastodon API + +Pour publier sur Mastodon, vous devez : + +1. Créer un compte sur une instance Mastodon +2. Créer une application dans les paramètres de votre compte pour obtenir un jeton d'accès +3. Configurer les scripts avec votre instance et votre jeton d'accès + +Modifiez les constantes suivantes dans les scripts `post_outdated_page.py` et `suggest_translation.py` : + +```python +MASTODON_API_URL = "https://mastodon.instance/api/v1/statuses" # Remplacez par votre instance +``` + +### Variables d'environnement + +Définissez la variable d'environnement suivante pour l'authentification Mastodon : + +```bash +export MASTODON_ACCESS_TOKEN="votre_jeton_d_acces" +``` + +## Utilisation + +### Analyser les pages wiki + +Pour analyser les pages wiki et générer les fichiers de données : + +```bash +./wiki_compare.py +``` + +Cela produira : +- `top_keys.json` : Les 10 clés OSM les plus utilisées +- `wiki_pages.csv` : Informations sur chaque page wiki +- `outdated_pages.json` : Pages qui ont besoin de mises à jour +- Une sortie console listant les 10 pages wiki qui ont besoin de mises à jour + +### Publier une suggestion de mise à jour + +Pour sélectionner aléatoirement une page française qui n'est pas à jour et publier une suggestion sur Mastodon : + +```bash +./post_outdated_page.py +``` + +Pour simuler la publication sans réellement poster sur Mastodon (mode test) : + +```bash +./post_outdated_page.py --dry-run +``` + +### Suggérer une traduction + +Pour identifier une page anglaise sans traduction française et publier une suggestion sur Mastodon : + +```bash +./suggest_translation.py +``` + +Pour simuler la publication sans réellement poster sur Mastodon (mode test) : + +```bash +./suggest_translation.py --dry-run +``` + +## Automatisation + +Vous pouvez automatiser l'exécution de ces scripts à l'aide de cron pour publier régulièrement des suggestions de mises à jour et de traductions. + +Exemple de configuration cron pour publier une suggestion de mise à jour chaque lundi et une suggestion de traduction chaque jeudi : + +``` +0 10 * * 1 cd /chemin/vers/wiki_compare && ./wiki_compare.py && ./post_outdated_page.py +0 10 * * 4 cd /chemin/vers/wiki_compare && ./wiki_compare.py && ./suggest_translation.py +``` + +## Structure des données + +### top_keys.json + +Contient les 10 clés OSM les plus utilisées avec leur nombre d'utilisations : + +```json +[ + { + "key": "building", + "count": 459876543 + }, + ... +] +``` + +### wiki_pages.csv + +Contient des informations sur chaque page wiki : + +``` +key,language,url,last_modified,sections,word_count +building,en,https://wiki.openstreetmap.org/wiki/Key:building,2023-05-15,12,3500 +building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2022-01-10,10,2800 +... +``` + +### outdated_pages.json + +Contient des informations détaillées sur les pages qui ont besoin de mises à jour : + +```json +[ + { + "key": "building", + "reason": "French page outdated by 491 days", + "en_page": { ... }, + "fr_page": { ... }, + "date_diff": 491, + "word_diff": 700, + "section_diff": 2, + "priority": 250.5 + }, + { + "key": "amenity", + "reason": "French page missing", + "en_page": { ... }, + "fr_page": null, + "date_diff": 0, + "word_diff": 4200, + "section_diff": 15, + "priority": 100 + }, + ... +] +``` + +## Dépannage + +### Problèmes courants + +1. **Erreur d'authentification Mastodon** : Vérifiez que la variable d'environnement `MASTODON_ACCESS_TOKEN` est correctement définie et que le jeton est valide. + +2. **Erreur de chargement des fichiers JSON** : Assurez-vous d'exécuter `wiki_compare.py` avant les autres scripts pour générer les fichiers de données nécessaires. + +3. **Aucune page à mettre à jour ou à traduire** : Il est possible que toutes les pages soient à jour ou traduites. Essayez d'augmenter le nombre de clés analysées en modifiant la valeur `limit` dans la fonction `fetch_top_keys` de `wiki_compare.py`. + +### Journalisation + +Tous les scripts utilisent le module `logging` pour enregistrer les informations d'exécution. Par défaut, les logs sont affichés dans la console. Pour les rediriger vers un fichier, modifiez la configuration de logging dans chaque script. + +## Contribution + +Les contributions sont les bienvenues ! N'hésitez pas à ouvrir une issue ou une pull request pour améliorer ces scripts. + +## Licence + +Ce projet est sous licence MIT. Voir le fichier LICENSE pour plus de détails. \ No newline at end of file diff --git a/wiki_compare/outdated_pages.json b/wiki_compare/outdated_pages.json new file mode 100644 index 00000000..62babab8 --- /dev/null +++ b/wiki_compare/outdated_pages.json @@ -0,0 +1,218 @@ +[ + { + "key": "surface", + "reason": "French page outdated by 1223 days, English page has 888 more words, English page has 11 more sections, English page has 130 more links", + "en_page": { + "key": "surface", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:surface", + "last_modified": "2025-06-29", + "sections": 24, + "word_count": 3574, + "link_count": 676 + }, + "fr_page": { + "key": "surface", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:surface", + "last_modified": "2022-02-22", + "sections": 13, + "word_count": 2686, + "link_count": 546 + }, + "date_diff": 1223, + "word_diff": 888, + "section_diff": 11, + "link_diff": 130, + "priority": 495.57000000000005 + }, + { + "key": "source", + "reason": "French page outdated by 552 days, English page has 4 more sections, English page has 84 more links", + "en_page": { + "key": "source", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:source", + "last_modified": "2025-08-12", + "sections": 27, + "word_count": 2851, + "link_count": 399 + }, + "fr_page": { + "key": "source", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:source", + "last_modified": "2024-02-07", + "sections": 23, + "word_count": 2692, + "link_count": 315 + }, + "date_diff": 552, + "word_diff": 159, + "section_diff": 4, + "link_diff": 84, + "priority": 223.25750000000002 + }, + { + "key": "name", + "reason": "French page outdated by 190 days, English page has 476 more words, English page has 94 more links", + "en_page": { + "key": "name", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:name", + "last_modified": "2025-07-25", + "sections": 17, + "word_count": 2295, + "link_count": 366 + }, + "fr_page": { + "key": "name", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:name", + "last_modified": "2025-01-16", + "sections": 21, + "word_count": 1819, + "link_count": 272 + }, + "date_diff": 190, + "word_diff": 476, + "section_diff": -4, + "link_diff": 94, + "priority": 79.39999999999999 + }, + { + "key": "addr:housenumber", + "reason": "French page outdated by 120 days", + "en_page": { + "key": "addr:housenumber", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:addr:housenumber", + "last_modified": "2025-07-24", + "sections": 11, + "word_count": 429, + "link_count": 182 + }, + "fr_page": { + "key": "addr:housenumber", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber", + "last_modified": "2025-03-26", + "sections": 15, + "word_count": 1754, + "link_count": 236 + }, + "date_diff": 120, + "word_diff": -1325, + "section_diff": -4, + "link_diff": -54, + "priority": 52.9225 + }, + { + "key": "addr:city", + "reason": "French page outdated by 125 days", + "en_page": { + "key": "addr:city", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:addr:city", + "last_modified": "2025-07-29", + "sections": 15, + "word_count": 901, + "link_count": 190 + }, + "fr_page": { + "key": "addr:city", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:addr:city", + "last_modified": "2025-03-26", + "sections": 15, + "word_count": 1754, + "link_count": 236 + }, + "date_diff": 125, + "word_diff": -853, + "section_diff": 0, + "link_diff": -46, + "priority": 52.8225 + }, + { + "key": "highway", + "reason": "French page outdated by 95 days, English page has 85 more links", + "en_page": { + "key": "highway", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:highway", + "last_modified": "2025-04-10", + "sections": 30, + "word_count": 4225, + "link_count": 865 + }, + "fr_page": { + "key": "highway", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:highway", + "last_modified": "2025-01-05", + "sections": 30, + "word_count": 4240, + "link_count": 780 + }, + "date_diff": 95, + "word_diff": -15, + "section_diff": 0, + "link_diff": 85, + "priority": 39.3125 + }, + { + "key": "natural", + "reason": "French page outdated by 87 days, English page has 571 more words, English page has 4 more sections, English page has 80 more links", + "en_page": { + "key": "natural", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:natural", + "last_modified": "2025-07-17", + "sections": 17, + "word_count": 2169, + "link_count": 620 + }, + "fr_page": { + "key": "natural", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:natural", + "last_modified": "2025-04-21", + "sections": 13, + "word_count": 1598, + "link_count": 540 + }, + "date_diff": 87, + "word_diff": 571, + "section_diff": 4, + "link_diff": 80, + "priority": 38.227500000000006 + }, + { + "key": "building", + "reason": "English page has 593 more words, English page has 6 more sections, English page has 83 more links", + "en_page": { + "key": "building", + "language": "en", + "url": "https://wiki.openstreetmap.org/wiki/Key:building", + "last_modified": "2025-06-10", + "sections": 31, + "word_count": 3873, + "link_count": 712 + }, + "fr_page": { + "key": "building", + "language": "fr", + "url": "https://wiki.openstreetmap.org/wiki/FR:Key:building", + "last_modified": "2025-05-22", + "sections": 25, + "word_count": 3280, + "link_count": 629 + }, + "date_diff": 19, + "word_diff": 593, + "section_diff": 6, + "link_diff": 83, + "priority": 11.5275 + } +] \ No newline at end of file diff --git a/wiki_compare/post_outdated_page.py b/wiki_compare/post_outdated_page.py new file mode 100755 index 00000000..b3443edc --- /dev/null +++ b/wiki_compare/post_outdated_page.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +post_outdated_page.py + +This script reads the outdated_pages.json file generated by wiki_compare.py, +randomly selects an outdated French wiki page, and posts a message on Mastodon +suggesting that the page needs updating. + +Usage: + python post_outdated_page.py [--dry-run] + +Options: + --dry-run Run the script without actually posting to Mastodon + +Output: + - A post on Mastodon about an outdated French wiki page + - Log messages about the selected page and posting status +""" + +import json +import random +import argparse +import logging +import os +from datetime import datetime +import requests + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + +# Constants +OUTDATED_PAGES_FILE = "outdated_pages.json" +MASTODON_API_URL = "https://mastodon.instance/api/v1/statuses" # Replace with actual instance +MASTODON_ACCESS_TOKEN = os.environ.get("MASTODON_ACCESS_TOKEN") + +def load_outdated_pages(): + """ + Load the outdated pages from the JSON file + + Returns: + list: List of dictionaries containing outdated page information + """ + try: + with open(OUTDATED_PAGES_FILE, 'r', encoding='utf-8') as f: + pages = json.load(f) + logger.info(f"Successfully loaded {len(pages)} outdated pages from {OUTDATED_PAGES_FILE}") + return pages + except (IOError, json.JSONDecodeError) as e: + logger.error(f"Error loading outdated pages from {OUTDATED_PAGES_FILE}: {e}") + return [] + +def select_random_outdated_page(pages): + """ + Randomly select an outdated French page from the list + + Args: + pages (list): List of dictionaries containing outdated page information + + Returns: + dict: Randomly selected outdated page or None if no suitable pages found + """ + # Filter pages to include only those with a French page (not missing) + pages_with_fr = [page for page in pages if page.get('fr_page') is not None] + + if not pages_with_fr: + logger.warning("No outdated French pages found") + return None + + # Randomly select a page + selected_page = random.choice(pages_with_fr) + logger.info(f"Randomly selected page for key '{selected_page['key']}'") + + return selected_page + +def create_mastodon_post(page): + """ + Create a Mastodon post about the outdated wiki page + + Args: + page (dict): Dictionary containing outdated page information + + Returns: + str: Formatted Mastodon post text + """ + key = page['key'] + reason = page['reason'] + fr_url = page['fr_page']['url'] + en_url = page['en_page']['url'] + + # Format the post + post = f"""📝 La page wiki OSM pour la clé #{key} a besoin d'une mise à jour ! + +Raison : {reason} + +Vous pouvez aider en mettant à jour la page française : +{fr_url} + +Page anglaise de référence : +{en_url} + +#OpenStreetMap #OSM #Wiki #Contribution #Traduction""" + + return post + +def post_to_mastodon(post_text, dry_run=False): + """ + Post the message to Mastodon + + Args: + post_text (str): Text to post + dry_run (bool): If True, don't actually post to Mastodon + + Returns: + bool: True if posting was successful or dry run, False otherwise + """ + if dry_run: + logger.info("DRY RUN: Would have posted to Mastodon:") + logger.info(post_text) + return True + + if not MASTODON_ACCESS_TOKEN: + logger.error("MASTODON_ACCESS_TOKEN environment variable not set") + return False + + headers = { + "Authorization": f"Bearer {MASTODON_ACCESS_TOKEN}", + "Content-Type": "application/json" + } + + data = { + "status": post_text, + "visibility": "public" + } + + try: + response = requests.post(MASTODON_API_URL, headers=headers, json=data) + response.raise_for_status() + logger.info("Successfully posted to Mastodon") + return True + except requests.exceptions.RequestException as e: + logger.error(f"Error posting to Mastodon: {e}") + return False + +def main(): + """Main function to execute the script""" + parser = argparse.ArgumentParser(description="Post about an outdated OSM wiki page on Mastodon") + parser.add_argument("--dry-run", action="store_true", help="Run without actually posting to Mastodon") + args = parser.parse_args() + + logger.info("Starting post_outdated_page.py") + + # Load outdated pages + outdated_pages = load_outdated_pages() + if not outdated_pages: + logger.error("No outdated pages found. Run wiki_compare.py first.") + return + + # Select a random outdated page + selected_page = select_random_outdated_page(outdated_pages) + if not selected_page: + logger.error("Could not select an outdated page.") + return + + # Create the post text + post_text = create_mastodon_post(selected_page) + + # Post to Mastodon + success = post_to_mastodon(post_text, args.dry_run) + + if success: + logger.info("Script completed successfully") + else: + logger.error("Script completed with errors") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/wiki_compare/suggest_translation.py b/wiki_compare/suggest_translation.py new file mode 100755 index 00000000..1c662c10 --- /dev/null +++ b/wiki_compare/suggest_translation.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +suggest_translation.py + +This script reads the outdated_pages.json file generated by wiki_compare.py, +identifies English wiki pages that don't have a French translation, +and posts a message on Mastodon suggesting that the page needs translation. + +Usage: + python suggest_translation.py [--dry-run] + +Options: + --dry-run Run the script without actually posting to Mastodon + +Output: + - A post on Mastodon suggesting a wiki page for translation + - Log messages about the selected page and posting status +""" + +import json +import random +import argparse +import logging +import os +from datetime import datetime +import requests + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + +# Constants +OUTDATED_PAGES_FILE = "outdated_pages.json" +MASTODON_API_URL = "https://mastodon.instance/api/v1/statuses" # Replace with actual instance +MASTODON_ACCESS_TOKEN = os.environ.get("MASTODON_ACCESS_TOKEN") + +def load_outdated_pages(): + """ + Load the outdated pages from the JSON file + + Returns: + list: List of dictionaries containing outdated page information + """ + try: + with open(OUTDATED_PAGES_FILE, 'r', encoding='utf-8') as f: + pages = json.load(f) + logger.info(f"Successfully loaded {len(pages)} pages from {OUTDATED_PAGES_FILE}") + return pages + except (IOError, json.JSONDecodeError) as e: + logger.error(f"Error loading pages from {OUTDATED_PAGES_FILE}: {e}") + return [] + +def find_missing_translations(pages): + """ + Find English pages that don't have a French translation + + Args: + pages (list): List of dictionaries containing page information + + Returns: + list: List of pages that need translation + """ + # Filter pages to include only those with a missing French page + missing_translations = [page for page in pages if + page.get('reason') == 'French page missing' and + page.get('en_page') is not None and + page.get('fr_page') is None] + + logger.info(f"Found {len(missing_translations)} pages without French translation") + return missing_translations + +def select_random_page_for_translation(pages): + """ + Randomly select a page for translation from the list + + Args: + pages (list): List of dictionaries containing page information + + Returns: + dict: Randomly selected page or None if no suitable pages found + """ + if not pages: + logger.warning("No pages found that need translation") + return None + + # Randomly select a page + selected_page = random.choice(pages) + logger.info(f"Randomly selected page for key '{selected_page['key']}' for translation") + + return selected_page + +def create_mastodon_post(page): + """ + Create a Mastodon post suggesting a page for translation + + Args: + page (dict): Dictionary containing page information + + Returns: + str: Formatted Mastodon post text + """ + key = page['key'] + en_url = page['en_page']['url'] + fr_url = en_url.replace('/wiki/Key:', '/wiki/FR:Key:') + + # Get word count and sections from English page + word_count = page['en_page']['word_count'] + sections = page['en_page']['sections'] + + # Format the post + post = f"""🔍 Clé OSM sans traduction française : #{key} + +Cette page wiki importante n'a pas encore de traduction française ! + +📊 Statistiques de la page anglaise : +• {word_count} mots +• {sections} sections + +Vous pouvez aider en créant la traduction française ici : +{fr_url} + +Page anglaise à traduire : +{en_url} + +#OpenStreetMap #OSM #Wiki #Traduction #Contribution""" + + return post + +def post_to_mastodon(post_text, dry_run=False): + """ + Post the message to Mastodon + + Args: + post_text (str): Text to post + dry_run (bool): If True, don't actually post to Mastodon + + Returns: + bool: True if posting was successful or dry run, False otherwise + """ + if dry_run: + logger.info("DRY RUN: Would have posted to Mastodon:") + logger.info(post_text) + return True + + if not MASTODON_ACCESS_TOKEN: + logger.error("MASTODON_ACCESS_TOKEN environment variable not set") + return False + + headers = { + "Authorization": f"Bearer {MASTODON_ACCESS_TOKEN}", + "Content-Type": "application/json" + } + + data = { + "status": post_text, + "visibility": "public" + } + + try: + response = requests.post(MASTODON_API_URL, headers=headers, json=data) + response.raise_for_status() + logger.info("Successfully posted to Mastodon") + return True + except requests.exceptions.RequestException as e: + logger.error(f"Error posting to Mastodon: {e}") + return False + +def main(): + """Main function to execute the script""" + parser = argparse.ArgumentParser(description="Suggest an OSM wiki page for translation on Mastodon") + parser.add_argument("--dry-run", action="store_true", help="Run without actually posting to Mastodon") + args = parser.parse_args() + + logger.info("Starting suggest_translation.py") + + # Load pages + pages = load_outdated_pages() + if not pages: + logger.error("No pages found. Run wiki_compare.py first.") + return + + # Find pages that need translation + pages_for_translation = find_missing_translations(pages) + if not pages_for_translation: + logger.error("No pages found that need translation.") + return + + # Select a random page for translation + selected_page = select_random_page_for_translation(pages_for_translation) + if not selected_page: + logger.error("Could not select a page for translation.") + return + + # Create the post text + post_text = create_mastodon_post(selected_page) + + # Post to Mastodon + success = post_to_mastodon(post_text, args.dry_run) + + if success: + logger.info("Script completed successfully") + else: + logger.error("Script completed with errors") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/wiki_compare/top_keys.json b/wiki_compare/top_keys.json new file mode 100644 index 00000000..989bab9b --- /dev/null +++ b/wiki_compare/top_keys.json @@ -0,0 +1,42 @@ +[ + { + "key": "building", + "count": 656287377 + }, + { + "key": "source", + "count": 299623433 + }, + { + "key": "highway", + "count": 280087725 + }, + { + "key": "addr:housenumber", + "count": 171973287 + }, + { + "key": "addr:street", + "count": 160834345 + }, + { + "key": "addr:city", + "count": 123283625 + }, + { + "key": "name", + "count": 109176151 + }, + { + "key": "addr:postcode", + "count": 106943837 + }, + { + "key": "natural", + "count": 84435807 + }, + { + "key": "surface", + "count": 72048796 + } +] \ No newline at end of file diff --git a/wiki_compare/wiki_compare.py b/wiki_compare/wiki_compare.py new file mode 100755 index 00000000..13614d9b --- /dev/null +++ b/wiki_compare/wiki_compare.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +wiki_compare.py + +This script fetches the 10 most used OpenStreetMap keys from TagInfo, +compares their English and French wiki pages, and identifies which pages +need updating based on modification dates and content analysis. + +Usage: + python wiki_compare.py + +Output: + - top_keys.json: JSON file containing the 10 most used OSM keys + - wiki_pages.csv: CSV file with information about each wiki page + - outdated_pages.json: JSON file containing pages that need updating + - A console output listing the 10 wiki pages that need updating +""" + +import json +import csv +import requests +import re +import os +from datetime import datetime +from bs4 import BeautifulSoup +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + +# Constants +TAGINFO_API_URL = "https://taginfo.openstreetmap.org/api/4/keys/all" +WIKI_BASE_URL_EN = "https://wiki.openstreetmap.org/wiki/Key:" +WIKI_BASE_URL_FR = "https://wiki.openstreetmap.org/wiki/FR:Key:" +TOP_KEYS_FILE = "top_keys.json" +WIKI_PAGES_CSV = "wiki_pages.csv" +OUTDATED_PAGES_FILE = "outdated_pages.json" + +def fetch_top_keys(limit=50): + """ + Fetch the most used OSM keys from TagInfo API + + Args: + limit (int): Number of keys to fetch + + Returns: + list: List of dictionaries containing key information + """ + logger.info(f"Fetching top {limit} OSM keys from TagInfo API...") + + params = { + 'page': 1, + 'rp': limit, + 'sortname': 'count_all', + 'sortorder': 'desc' + } + + try: + response = requests.get(TAGINFO_API_URL, params=params) + response.raise_for_status() + data = response.json() + + # Extract just the key names and counts + top_keys = [{'key': item['key'], 'count': item['count_all']} for item in data['data']] + + logger.info(f"Successfully fetched {len(top_keys)} keys") + return top_keys + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching data from TagInfo API: {e}") + return [] + +def save_to_json(data, filename): + """ + Save data to a JSON file + + Args: + data: Data to save + filename (str): Name of the file + """ + try: + with open(filename, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logger.info(f"Data saved to {filename}") + except IOError as e: + logger.error(f"Error saving data to {filename}: {e}") + +def fetch_wiki_page(key, language='en'): + """ + Fetch wiki page for a given key + + Args: + key (str): OSM key + language (str): Language code ('en' or 'fr') + + Returns: + dict: Dictionary with page information or None if page doesn't exist + """ + base_url = WIKI_BASE_URL_EN if language == 'en' else WIKI_BASE_URL_FR + url = f"{base_url}{key}" + + logger.info(f"Fetching {language} wiki page for key '{key}': {url}") + + try: + response = requests.get(url) + + # Check if page exists + if response.status_code == 404: + logger.warning(f"Wiki page for key '{key}' in {language} does not exist") + return None + + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + + # Get last modification date + last_modified = None + footer_info = soup.select_one('#footer-info-lastmod') + if footer_info: + date_text = footer_info.text + # Extract date using regex + date_match = re.search(r'(\d{1,2} \w+ \d{4})', date_text) + if date_match: + date_str = date_match.group(1) + try: + # Parse date (format may vary based on wiki language) + last_modified = datetime.strptime(date_str, '%d %B %Y').strftime('%Y-%m-%d') + except ValueError: + logger.warning(f"Could not parse date: {date_str}") + + # Count sections (h2, h3, h4) + sections = len(soup.select('h2, h3, h4')) + + # Count words in the content + content = soup.select_one('#mw-content-text') + if content: + # Remove script and style elements + for script in content.select('script, style'): + script.extract() + + # Get text and count words + text = content.get_text(separator=' ', strip=True) + word_count = len(text.split()) + + # Count links + links = content.select('a') + link_count = len(links) + else: + word_count = 0 + link_count = 0 + + return { + 'key': key, + 'language': language, + 'url': url, + 'last_modified': last_modified, + 'sections': sections, + 'word_count': word_count, + 'link_count': link_count + } + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching wiki page for key '{key}' in {language}: {e}") + return None + +def analyze_wiki_pages(pages): + """ + Analyze wiki pages to determine which ones need updating + + Args: + pages (list): List of dictionaries containing page information + + Returns: + list: List of pages that need updating, sorted by priority + """ + logger.info("Analyzing wiki pages to identify those needing updates...") + + # Group pages by key + pages_by_key = {} + for page in pages: + if page is None: + continue + + key = page['key'] + if key not in pages_by_key: + pages_by_key[key] = {} + + pages_by_key[key][page['language']] = page + + # Analyze each key's pages + needs_update = [] + + for key, lang_pages in pages_by_key.items(): + # Skip if either language is missing + if 'en' not in lang_pages or 'fr' not in lang_pages: + if 'en' in lang_pages: + # French page is missing + needs_update.append({ + 'key': key, + 'reason': 'French page missing', + 'en_page': lang_pages['en'], + 'fr_page': None, + 'date_diff': 0, + 'word_diff': lang_pages['en']['word_count'], + 'section_diff': lang_pages['en']['sections'], + 'link_diff': lang_pages['en']['link_count'], + 'priority': 100 # High priority for missing pages + }) + continue + + en_page = lang_pages['en'] + fr_page = lang_pages['fr'] + + # Skip if dates are missing + if not en_page['last_modified'] or not fr_page['last_modified']: + continue + + # Calculate date difference in days + en_date = datetime.strptime(en_page['last_modified'], '%Y-%m-%d') + fr_date = datetime.strptime(fr_page['last_modified'], '%Y-%m-%d') + date_diff = (en_date - fr_date).days + + # Calculate content differences + word_diff = en_page['word_count'] - fr_page['word_count'] + section_diff = en_page['sections'] - fr_page['sections'] + link_diff = en_page['link_count'] - fr_page['link_count'] + + # Calculate priority score (higher means needs more urgent update) + # Weight factors can be adjusted + priority = ( + abs(date_diff) * 0.4 + # Date difference + abs(word_diff) / 100 * 0.25 + # Word count difference (normalized) + abs(section_diff) * 0.2 + # Section difference + abs(link_diff) / 10 * 0.15 # Link count difference (normalized) + ) + + if date_diff > 30 or word_diff > 200 or section_diff > 2 or link_diff > 20 or fr_page['word_count'] < en_page['word_count'] * 0.7: + reason = [] + if date_diff > 30: + reason.append(f"French page outdated by {date_diff} days") + if word_diff > 200: + reason.append(f"English page has {word_diff} more words") + if section_diff > 2: + reason.append(f"English page has {section_diff} more sections") + if link_diff > 20: + reason.append(f"English page has {link_diff} more links") + if fr_page['word_count'] < en_page['word_count'] * 0.7: + reason.append(f"French page is only {fr_page['word_count'] / en_page['word_count']:.0%} of English content") + + needs_update.append({ + 'key': key, + 'reason': ', '.join(reason), + 'en_page': en_page, + 'fr_page': fr_page, + 'date_diff': date_diff, + 'word_diff': word_diff, + 'section_diff': section_diff, + 'link_diff': link_diff, + 'priority': priority + }) + + # Sort by priority (descending) + needs_update.sort(key=lambda x: x['priority'], reverse=True) + + return needs_update + +def main(): + """Main function to execute the script""" + logger.info("Starting wiki_compare.py") + + # Create output directory if it doesn't exist + os.makedirs(os.path.dirname(os.path.abspath(__file__)), exist_ok=True) + + # Fetch top keys + top_keys = fetch_top_keys(10) + + if not top_keys: + logger.error("Failed to fetch top keys. Exiting.") + return + + # Save top keys to JSON + save_to_json(top_keys, TOP_KEYS_FILE) + + # Fetch wiki pages for each key + wiki_pages = [] + + for key_info in top_keys: + key = key_info['key'] + + # Fetch English page + en_page = fetch_wiki_page(key, 'en') + if en_page: + wiki_pages.append(en_page) + + # Fetch French page + fr_page = fetch_wiki_page(key, 'fr') + if fr_page: + wiki_pages.append(fr_page) + + # Save wiki pages to CSV + try: + with open(WIKI_PAGES_CSV, 'w', newline='', encoding='utf-8') as f: + fieldnames = ['key', 'language', 'url', 'last_modified', 'sections', 'word_count', 'link_count'] + writer = csv.DictWriter(f, fieldnames=fieldnames) + + writer.writeheader() + for page in wiki_pages: + if page: # Skip None values + writer.writerow(page) + + logger.info(f"Wiki page data saved to {WIKI_PAGES_CSV}") + + except IOError as e: + logger.error(f"Error saving data to {WIKI_PAGES_CSV}: {e}") + return + + # Analyze pages to find those needing updates + pages_to_update = analyze_wiki_pages(wiki_pages) + + # Save pages that need updating to JSON + save_to_json(pages_to_update, OUTDATED_PAGES_FILE) + + # Print the top 10 pages needing updates + print("\n===== TOP 10 WIKI PAGES NEEDING UPDATES =====") + + for i, page in enumerate(pages_to_update[:10], 1): + key = page['key'] + reason = page['reason'] + en_url = page['en_page']['url'] if page['en_page'] else "N/A" + fr_url = page['fr_page']['url'] if page['fr_page'] else "N/A" + + print(f"{i}. Key: {key}") + print(f" Reason: {reason}") + print(f" English: {en_url}") + print(f" French: {fr_url}") + print() + + logger.info("Script completed successfully") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/wiki_compare/wiki_pages.csv b/wiki_compare/wiki_pages.csv new file mode 100644 index 00000000..14ec9677 --- /dev/null +++ b/wiki_compare/wiki_pages.csv @@ -0,0 +1,21 @@ +key,language,url,last_modified,sections,word_count,link_count +building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3873,712 +building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3280,629 +source,en,https://wiki.openstreetmap.org/wiki/Key:source,2025-08-12,27,2851,399 +source,fr,https://wiki.openstreetmap.org/wiki/FR:Key:source,2024-02-07,23,2692,315 +highway,en,https://wiki.openstreetmap.org/wiki/Key:highway,2025-04-10,30,4225,865 +highway,fr,https://wiki.openstreetmap.org/wiki/FR:Key:highway,2025-01-05,30,4240,780 +addr:housenumber,en,https://wiki.openstreetmap.org/wiki/Key:addr:housenumber,2025-07-24,11,429,182 +addr:housenumber,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:housenumber,2025-03-26,15,1754,236 +addr:street,en,https://wiki.openstreetmap.org/wiki/Key:addr:street,2024-10-29,12,701,186 +addr:street,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:street,2025-03-26,15,1754,236 +addr:city,en,https://wiki.openstreetmap.org/wiki/Key:addr:city,2025-07-29,15,901,190 +addr:city,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:city,2025-03-26,15,1754,236 +name,en,https://wiki.openstreetmap.org/wiki/Key:name,2025-07-25,17,2295,366 +name,fr,https://wiki.openstreetmap.org/wiki/FR:Key:name,2025-01-16,21,1819,272 +addr:postcode,en,https://wiki.openstreetmap.org/wiki/Key:addr:postcode,2024-10-29,14,481,168 +addr:postcode,fr,https://wiki.openstreetmap.org/wiki/FR:Key:addr:postcode,2025-03-26,15,1754,236 +natural,en,https://wiki.openstreetmap.org/wiki/Key:natural,2025-07-17,17,2169,620 +natural,fr,https://wiki.openstreetmap.org/wiki/FR:Key:natural,2025-04-21,13,1598,540 +surface,en,https://wiki.openstreetmap.org/wiki/Key:surface,2025-06-29,24,3574,676 +surface,fr,https://wiki.openstreetmap.org/wiki/FR:Key:surface,2022-02-22,13,2686,546