ajout grammalecte

This commit is contained in:
Tykayn 2025-09-01 11:38:19 +02:00 committed by tykayn
parent e61d932565
commit 471eab4cd0
8 changed files with 45296 additions and 283 deletions

View file

@ -0,0 +1 @@
sudo apt install aspell aspell-fr grammalecte-cli

File diff suppressed because it is too large Load diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Before After
Before After

View file

@ -1,6 +1,6 @@
[
{
"key": "building",
"count": 657147429
"count": 657211643
}
]

View file

@ -1,5 +1,5 @@
{
"last_updated": "2025-08-31T23:48:47.574109",
"last_updated": "2025-09-01T10:50:55.122263",
"untranslated_pages": [
{
"title": "FR:2017 Ouragans Irma et Maria",

View file

@ -28,6 +28,8 @@ import csv
import requests
import re
import os
import subprocess
import tempfile
from datetime import datetime
from bs4 import BeautifulSoup
import logging
@ -61,12 +63,14 @@ NUM_WIKI_PAGES = 1
# 3. Pages with FR: prefix (e.g., "FR:Tag:leisure%3Dchildren_club")
SPECIFIC_PAGES = [
"Anatomie_des_étiquettes_osm",
"FR:Tag:leisure%3Dchildren_club",
"FR:Tag:harassment_prevention%3Dask_angela",
"FR:Tag:leisure=children_club",
"FR:Tag:harassment_prevention=Dask_angela",
"Key:harassment_prevention",
"Proposal process",
"Automated_Edits_code_of_conduct",
"Key:cuisine"
"Key:cuisine",
"Libre_Charge_Map",
"OSM_Mon_Commerce"
]
def fetch_top_keys(limit=NUM_WIKI_PAGES):
@ -118,6 +122,90 @@ def save_to_json(data, filename):
except IOError as e:
logger.error(f"Error saving data to {filename}: {e}")
def check_grammar_with_grammalecte(text):
"""
Check grammar in French text using grammalecte-cli
Args:
text (str): French text to check
Returns:
list: List of grammar suggestions
"""
if not text or len(text.strip()) == 0:
logger.warning("Empty text provided for grammar checking")
return []
logger.info("Checking grammar with grammalecte-cli...")
try:
# Create a temporary file with the text
with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', suffix='.txt', delete=False) as temp_file:
temp_file.write(text)
temp_file_path = temp_file.name
# Run grammalecte-cli on the temporary file
cmd = ['grammalecte-cli', '-f', temp_file_path, '-j', '-ctx', '-wss']
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
# Parse the JSON output
grammar_data = json.loads(result.stdout)
# Extract grammar errors from all paragraphs
grammar_suggestions = []
for paragraph in grammar_data.get('data', []):
paragraph_index = paragraph.get('iParagraph', 0)
# Process grammar errors
for error in paragraph.get('lGrammarErrors', []):
suggestion = {
'paragraph': paragraph_index,
'start': error.get('nStart', 0),
'end': error.get('nEnd', 0),
'type': error.get('sType', ''),
'message': error.get('sMessage', ''),
'suggestions': error.get('aSuggestions', []),
'text': error.get('sUnderlined', ''),
'before': error.get('sBefore', ''),
'after': error.get('sAfter', '')
}
grammar_suggestions.append(suggestion)
# Process spelling errors
for error in paragraph.get('lSpellingErrors', []):
suggestion = {
'paragraph': paragraph_index,
'start': error.get('nStart', 0),
'end': error.get('nEnd', 0),
'type': 'spelling',
'message': 'Erreur d\'orthographe',
'suggestions': error.get('aSuggestions', []),
'text': error.get('sUnderlined', ''),
'before': error.get('sBefore', ''),
'after': error.get('sAfter', '')
}
grammar_suggestions.append(suggestion)
# Clean up the temporary file
os.unlink(temp_file_path)
logger.info(f"Found {len(grammar_suggestions)} grammar/spelling suggestions")
return grammar_suggestions
except subprocess.CalledProcessError as e:
logger.error(f"Error running grammalecte-cli: {e}")
logger.error(f"stdout: {e.stdout}")
logger.error(f"stderr: {e.stderr}")
return []
except json.JSONDecodeError as e:
logger.error(f"Error parsing grammalecte-cli output: {e}")
return []
except Exception as e:
logger.error(f"Unexpected error during grammar checking: {e}")
return []
def fetch_wiki_page(key, language='en', is_specific_page=False):
"""
Fetch wiki page for a given key or specific page
@ -225,6 +313,7 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
# Count words in the content
content = soup.select_one('#mw-content-text')
clean_text = ""
if content:
# Remove script and style elements
for script in content.select('script, style'):
@ -235,8 +324,14 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
languages_elem.extract()
# Get text and count words
text = content.get_text(separator=' ', strip=True)
word_count = len(text.split())
clean_text = content.get_text(separator=' ', strip=True)
word_count = len(clean_text.split())
# Check grammar for French pages
grammar_suggestions = []
if language == 'fr':
logger.info(f"Checking grammar for French page: {key}")
grammar_suggestions = check_grammar_with_grammalecte(clean_text)
# Extract links
links = content.select('a')
@ -433,6 +528,7 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
media_count = 0
media_details = []
categories = []
grammar_suggestions = []
return {
'key': key,
@ -449,7 +545,8 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
'media_details': media_details,
'categories': categories,
'description_img_url': description_img_url,
'is_specific_page': is_specific_page
'is_specific_page': is_specific_page,
'grammar_suggestions': grammar_suggestions
}
except requests.exceptions.RequestException as e:

View file

@ -2,15 +2,15 @@ key,language,url,last_modified,sections,word_count,link_count,media_count,stalen
building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3774,627,158,8.91,https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3181,544,155,8.91,https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
Anatomie_des_étiquettes_osm,en,https://wiki.openstreetmap.org/wiki/Anatomie_des_étiquettes_osm,2025-06-08,22,963,53,0,100,
FR:Tag:leisure%3Dchildren_club,fr,https://wiki.openstreetmap.org/wiki/FR:Tag:leisure%3Dchildren_club,2024-05-02,8,294,67,10,0,https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG/200px-Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG
https://wiki.openstreetmap.org/wiki/Tag:leisure%3Dchildren_club,en,https://wiki.openstreetmap.org/wiki/Tag:leisure%3Dchildren_club,2025-02-02,9,163,69,9,100,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
FR:Tag:harassment_prevention%3Dask_angela,fr,https://wiki.openstreetmap.org/wiki/FR:Tag:harassment_prevention%3Dask_angela,2025-07-10,20,873,166,15,0,https://wiki.openstreetmap.org/w/images/thumb/1/15/2024-06-27T08.40.50_ask_angela_lyon.jpg/200px-2024-06-27T08.40.50_ask_angela_lyon.jpg
https://wiki.openstreetmap.org/wiki/Tag:harassment_prevention%3Dask_angela,en,https://wiki.openstreetmap.org/wiki/Tag:harassment_prevention%3Dask_angela,2025-02-22,14,463,72,9,100,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
FR:Tag:leisure=children_club,fr,https://wiki.openstreetmap.org/wiki/FR:Tag:leisure=children_club,2024-05-02,8,294,67,10,0,https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG/200px-Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG
https://wiki.openstreetmap.org/wiki/Tag:leisure=children_club,en,https://wiki.openstreetmap.org/wiki/Tag:leisure=children_club,2025-02-02,9,163,69,9,100,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
Key:harassment_prevention,en,https://wiki.openstreetmap.org/wiki/Key:harassment_prevention,2024-08-10,12,196,69,14,66.72,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
Key:harassment_prevention,fr,https://wiki.openstreetmap.org/wiki/FR:Key:harassment_prevention,2025-07-03,15,328,83,14,66.72,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
Proposal process,en,https://wiki.openstreetmap.org/wiki/Proposal process,2025-08-13,44,5253,187,4,165.53,https://wiki.openstreetmap.org/w/images/thumb/c/c2/Save_proposal_first.png/761px-Save_proposal_first.png
Proposal process,fr,https://wiki.openstreetmap.org/wiki/FR:Proposal process,2023-09-22,15,1146,24,0,165.53,
Proposal process,en,https://wiki.openstreetmap.org/wiki/Proposal process,2025-08-13,46,5292,202,4,166.25,https://wiki.openstreetmap.org/w/images/thumb/c/c2/Save_proposal_first.png/761px-Save_proposal_first.png
Proposal process,fr,https://wiki.openstreetmap.org/wiki/FR:Proposal process,2023-09-22,15,1146,24,0,166.25,
Automated_Edits_code_of_conduct,en,https://wiki.openstreetmap.org/wiki/Automated_Edits_code_of_conduct,2025-07-26,19,2062,69,0,26.35,
Automated_Edits_code_of_conduct,fr,https://wiki.openstreetmap.org/wiki/FR:Automated_Edits_code_of_conduct,2025-04-03,17,1571,16,0,26.35,
Key:cuisine,en,https://wiki.openstreetmap.org/wiki/Key:cuisine,2025-07-23,17,3422,693,303,107.73,https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/Food_montage.jpg/200px-Food_montage.jpg
Key:cuisine,fr,https://wiki.openstreetmap.org/wiki/FR:Key:cuisine,2024-02-16,15,2866,690,316,107.73,https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/Food_montage.jpg/200px-Food_montage.jpg
Libre_Charge_Map,en,https://wiki.openstreetmap.org/wiki/Libre_Charge_Map,2025-07-28,11,328,10,2,100,https://wiki.openstreetmap.org/w/images/thumb/8/8e/Screenshot_2025-07-28_at_14-40-11_LibreChargeMap_-_OSM_Bliss.png/300px-Screenshot_2025-07-28_at_14-40-11_LibreChargeMap_-_OSM_Bliss.png
OSM_Mon_Commerce,en,https://wiki.openstreetmap.org/wiki/OSM_Mon_Commerce,2025-07-29,17,418,34,3,100,https://wiki.openstreetmap.org/w/images/thumb/6/67/Villes_OSM_Mon_Commerce.png/500px-Villes_OSM_Mon_Commerce.png

1 key language url last_modified sections word_count link_count media_count staleness_score description_img_url
2 building en https://wiki.openstreetmap.org/wiki/Key:building 2025-06-10 31 3774 627 158 8.91 https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
3 building fr https://wiki.openstreetmap.org/wiki/FR:Key:building 2025-05-22 25 3181 544 155 8.91 https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
4 Anatomie_des_étiquettes_osm en https://wiki.openstreetmap.org/wiki/Anatomie_des_étiquettes_osm 2025-06-08 22 963 53 0 100
5 FR:Tag:leisure%3Dchildren_club FR:Tag:leisure=children_club fr https://wiki.openstreetmap.org/wiki/FR:Tag:leisure%3Dchildren_club https://wiki.openstreetmap.org/wiki/FR:Tag:leisure=children_club 2024-05-02 8 294 67 10 0 https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG/200px-Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG
6 https://wiki.openstreetmap.org/wiki/Tag:leisure%3Dchildren_club https://wiki.openstreetmap.org/wiki/Tag:leisure=children_club en https://wiki.openstreetmap.org/wiki/Tag:leisure%3Dchildren_club https://wiki.openstreetmap.org/wiki/Tag:leisure=children_club 2025-02-02 9 163 69 9 100 https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
FR:Tag:harassment_prevention%3Dask_angela fr https://wiki.openstreetmap.org/wiki/FR:Tag:harassment_prevention%3Dask_angela 2025-07-10 20 873 166 15 0 https://wiki.openstreetmap.org/w/images/thumb/1/15/2024-06-27T08.40.50_ask_angela_lyon.jpg/200px-2024-06-27T08.40.50_ask_angela_lyon.jpg
https://wiki.openstreetmap.org/wiki/Tag:harassment_prevention%3Dask_angela en https://wiki.openstreetmap.org/wiki/Tag:harassment_prevention%3Dask_angela 2025-02-22 14 463 72 9 100 https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
7 Key:harassment_prevention en https://wiki.openstreetmap.org/wiki/Key:harassment_prevention 2024-08-10 12 196 69 14 66.72 https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
8 Key:harassment_prevention fr https://wiki.openstreetmap.org/wiki/FR:Key:harassment_prevention 2025-07-03 15 328 83 14 66.72 https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
9 Proposal process en https://wiki.openstreetmap.org/wiki/Proposal process 2025-08-13 44 46 5253 5292 187 202 4 165.53 166.25 https://wiki.openstreetmap.org/w/images/thumb/c/c2/Save_proposal_first.png/761px-Save_proposal_first.png
10 Proposal process fr https://wiki.openstreetmap.org/wiki/FR:Proposal process 2023-09-22 15 1146 24 0 165.53 166.25
11 Automated_Edits_code_of_conduct en https://wiki.openstreetmap.org/wiki/Automated_Edits_code_of_conduct 2025-07-26 19 2062 69 0 26.35
12 Automated_Edits_code_of_conduct fr https://wiki.openstreetmap.org/wiki/FR:Automated_Edits_code_of_conduct 2025-04-03 17 1571 16 0 26.35
13 Key:cuisine en https://wiki.openstreetmap.org/wiki/Key:cuisine 2025-07-23 17 3422 693 303 107.73 https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/Food_montage.jpg/200px-Food_montage.jpg
14 Key:cuisine fr https://wiki.openstreetmap.org/wiki/FR:Key:cuisine 2024-02-16 15 2866 690 316 107.73 https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/Food_montage.jpg/200px-Food_montage.jpg
15 Libre_Charge_Map en https://wiki.openstreetmap.org/wiki/Libre_Charge_Map 2025-07-28 11 328 10 2 100 https://wiki.openstreetmap.org/w/images/thumb/8/8e/Screenshot_2025-07-28_at_14-40-11_LibreChargeMap_-_OSM_Bliss.png/300px-Screenshot_2025-07-28_at_14-40-11_LibreChargeMap_-_OSM_Bliss.png
16 OSM_Mon_Commerce en https://wiki.openstreetmap.org/wiki/OSM_Mon_Commerce 2025-07-29 17 418 34 3 100 https://wiki.openstreetmap.org/w/images/thumb/6/67/Villes_OSM_Mon_Commerce.png/500px-Villes_OSM_Mon_Commerce.png