ajout grammalecte
This commit is contained in:
parent
e61d932565
commit
471eab4cd0
8 changed files with 45296 additions and 283 deletions
|
@ -111,10 +111,13 @@
|
|||
{% if fr_page is defined and fr_page is not null %}
|
||||
|
||||
|
||||
{% if detailed_comparison is defined and detailed_comparison is not null and detailed_comparison.grammar_suggestions is defined and detailed_comparison.grammar_suggestions is not null and detailed_comparison.grammar_suggestions|length > 0 %}
|
||||
{% if fr_page is defined and fr_page is not null and fr_page.grammar_suggestions is defined and fr_page.grammar_suggestions is not null and fr_page.grammar_suggestions|length > 0 %}
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<div class="card-header d-flex justify-content-between align-items-center">
|
||||
<h2>Suggestions de corrections grammaticales</h2>
|
||||
<button type="button" class="btn btn-primary" data-bs-toggle="modal" data-bs-target="#allCorrectionsModal">
|
||||
Voir toutes les corrections
|
||||
</button>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="alert alert-info">
|
||||
|
@ -125,17 +128,19 @@
|
|||
<table class="table table-striped table-bordered">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Contexte</th>
|
||||
<th>Texte</th>
|
||||
<th>Message</th>
|
||||
<th>Suggestions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for suggestion in detailed_comparison.grammar_suggestions %}
|
||||
{% for suggestion in fr_page.grammar_suggestions|slice(0, 5) %}
|
||||
<tr>
|
||||
<td>
|
||||
{% if suggestion.context is defined %}
|
||||
<code>{{ suggestion.context }}</code>
|
||||
{% if suggestion.before is defined and suggestion.text is defined and suggestion.after is defined %}
|
||||
<code>{{ suggestion.before|slice(-20)|trim }}
|
||||
<span class="text-danger">{{ suggestion.text }}</span>
|
||||
{{ suggestion.after|slice(0, 20)|trim }}</code>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
|
@ -145,11 +150,16 @@
|
|||
</td>
|
||||
<td>
|
||||
{% if suggestion.suggestions is defined and suggestion.suggestions is iterable and suggestion.suggestions|length > 0 %}
|
||||
<ul class="list-unstyled mb-0">
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-sm btn-outline-primary dropdown-toggle" type="button" id="dropdownMenuButton{{ loop.index }}" data-bs-toggle="dropdown" aria-expanded="false">
|
||||
Suggestions ({{ suggestion.suggestions|length }})
|
||||
</button>
|
||||
<ul class="dropdown-menu" aria-labelledby="dropdownMenuButton{{ loop.index }}">
|
||||
{% for correction in suggestion.suggestions %}
|
||||
<li><code>{{ correction }}</code></li>
|
||||
<li><a class="dropdown-item" href="#">{{ correction }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% else %}
|
||||
<span class="text-muted">Aucune suggestion</span>
|
||||
{% endif %}
|
||||
|
@ -158,6 +168,132 @@
|
|||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% if fr_page.grammar_suggestions|length > 5 %}
|
||||
<div class="text-center mt-3">
|
||||
<p>{{ fr_page.grammar_suggestions|length - 5 }} suggestions supplémentaires disponibles</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal for all corrections -->
|
||||
<div class="modal fade" id="allCorrectionsModal" tabindex="-1" aria-labelledby="allCorrectionsModalLabel" aria-hidden="true">
|
||||
<div class="modal-dialog modal-xl">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title" id="allCorrectionsModalLabel">Toutes les suggestions de corrections</h5>
|
||||
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="table-responsive">
|
||||
<table class="table table-striped table-bordered">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Texte</th>
|
||||
<th>Message</th>
|
||||
<th>Suggestions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for suggestion in fr_page.grammar_suggestions %}
|
||||
<tr>
|
||||
<td>
|
||||
{% if suggestion.before is defined and suggestion.text is defined and suggestion.after is defined %}
|
||||
<code>{{ suggestion.before|slice(-30)|trim }}
|
||||
<span class="text-danger">{{ suggestion.text }}</span>
|
||||
{{ suggestion.after|slice(0, 30)|trim }}</code>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if suggestion.message is defined %}
|
||||
{{ suggestion.message }}
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if suggestion.suggestions is defined and suggestion.suggestions is iterable and suggestion.suggestions|length > 0 %}
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-sm btn-outline-primary dropdown-toggle" type="button" id="modalDropdownMenuButton{{ loop.index }}" data-bs-toggle="dropdown" aria-expanded="false">
|
||||
Suggestions ({{ suggestion.suggestions|length }})
|
||||
</button>
|
||||
<ul class="dropdown-menu" aria-labelledby="modalDropdownMenuButton{{ loop.index }}">
|
||||
{% for correction in suggestion.suggestions %}
|
||||
<li><a class="dropdown-item" href="#">{{ correction }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% else %}
|
||||
<span class="text-muted">Aucune suggestion</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Fermer</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if fr_page is defined and fr_page is not null and fr_page.categories is defined and (fr_page.categories|length == 0) and en_page is defined and en_page.categories is defined and (en_page.categories|length > 0) %}
|
||||
<div class="card mb-4">
|
||||
<div class="card-header bg-warning text-dark">
|
||||
<h2>Suggestion d'ajout de catégories</h2>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="alert alert-warning">
|
||||
<p><i class="bi bi-exclamation-triangle"></i> <strong>La page française ne contient aucune catégorie.</strong></p>
|
||||
<p>Les catégories aident à organiser les pages wiki et à les rendre plus facilement découvrables. Considérez ajouter les catégories suivantes de la page anglaise :</p>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header bg-primary text-white">
|
||||
<h3>Catégories suggérées</h3>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<ul class="list-group">
|
||||
{% for category in en_page.categories %}
|
||||
<li class="list-group-item">
|
||||
{{ category }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header bg-info text-white">
|
||||
<h3>Comment ajouter des catégories</h3>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p>Pour ajouter des catégories à la page wiki :</p>
|
||||
<ol>
|
||||
<li>Éditez la page française sur le wiki OSM</li>
|
||||
<li>Ajoutez les catégories à la fin de la page en utilisant la syntaxe suivante :</li>
|
||||
</ol>
|
||||
<div class="bg-light p-3 mb-3">
|
||||
<code>
|
||||
[[Category:Nom de la catégorie]]<br>
|
||||
[[Category:Autre catégorie]]
|
||||
</code>
|
||||
</div>
|
||||
<p>Vous pouvez également utiliser l'interface d'édition du wiki pour ajouter des catégories.</p>
|
||||
<div class="d-grid gap-2 mt-3">
|
||||
<a href="{{ fr_page.url }}" target="_blank" class="btn btn-primary">
|
||||
<i class="bi bi-pencil-square"></i> Éditer la page française
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
1
wiki_compare/install_ubuntu.sh
Normal file
1
wiki_compare/install_ubuntu.sh
Normal file
|
@ -0,0 +1 @@
|
|||
sudo apt install aspell aspell-fr grammalecte-cli
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
Before Width: | Height: | Size: 43 KiB After Width: | Height: | Size: 43 KiB |
|
@ -1,6 +1,6 @@
|
|||
[
|
||||
{
|
||||
"key": "building",
|
||||
"count": 657147429
|
||||
"count": 657211643
|
||||
}
|
||||
]
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"last_updated": "2025-08-31T23:48:47.574109",
|
||||
"last_updated": "2025-09-01T10:50:55.122263",
|
||||
"untranslated_pages": [
|
||||
{
|
||||
"title": "FR:2017 Ouragans Irma et Maria",
|
||||
|
|
|
@ -28,6 +28,8 @@ import csv
|
|||
import requests
|
||||
import re
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
|
@ -61,12 +63,14 @@ NUM_WIKI_PAGES = 1
|
|||
# 3. Pages with FR: prefix (e.g., "FR:Tag:leisure%3Dchildren_club")
|
||||
SPECIFIC_PAGES = [
|
||||
"Anatomie_des_étiquettes_osm",
|
||||
"FR:Tag:leisure%3Dchildren_club",
|
||||
"FR:Tag:harassment_prevention%3Dask_angela",
|
||||
"FR:Tag:leisure=children_club",
|
||||
"FR:Tag:harassment_prevention=Dask_angela",
|
||||
"Key:harassment_prevention",
|
||||
"Proposal process",
|
||||
"Automated_Edits_code_of_conduct",
|
||||
"Key:cuisine"
|
||||
"Key:cuisine",
|
||||
"Libre_Charge_Map",
|
||||
"OSM_Mon_Commerce"
|
||||
]
|
||||
|
||||
def fetch_top_keys(limit=NUM_WIKI_PAGES):
|
||||
|
@ -118,6 +122,90 @@ def save_to_json(data, filename):
|
|||
except IOError as e:
|
||||
logger.error(f"Error saving data to {filename}: {e}")
|
||||
|
||||
def check_grammar_with_grammalecte(text):
|
||||
"""
|
||||
Check grammar in French text using grammalecte-cli
|
||||
|
||||
Args:
|
||||
text (str): French text to check
|
||||
|
||||
Returns:
|
||||
list: List of grammar suggestions
|
||||
"""
|
||||
if not text or len(text.strip()) == 0:
|
||||
logger.warning("Empty text provided for grammar checking")
|
||||
return []
|
||||
|
||||
logger.info("Checking grammar with grammalecte-cli...")
|
||||
|
||||
try:
|
||||
# Create a temporary file with the text
|
||||
with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', suffix='.txt', delete=False) as temp_file:
|
||||
temp_file.write(text)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
# Run grammalecte-cli on the temporary file
|
||||
cmd = ['grammalecte-cli', '-f', temp_file_path, '-j', '-ctx', '-wss']
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
|
||||
# Parse the JSON output
|
||||
grammar_data = json.loads(result.stdout)
|
||||
|
||||
# Extract grammar errors from all paragraphs
|
||||
grammar_suggestions = []
|
||||
for paragraph in grammar_data.get('data', []):
|
||||
paragraph_index = paragraph.get('iParagraph', 0)
|
||||
|
||||
# Process grammar errors
|
||||
for error in paragraph.get('lGrammarErrors', []):
|
||||
suggestion = {
|
||||
'paragraph': paragraph_index,
|
||||
'start': error.get('nStart', 0),
|
||||
'end': error.get('nEnd', 0),
|
||||
'type': error.get('sType', ''),
|
||||
'message': error.get('sMessage', ''),
|
||||
'suggestions': error.get('aSuggestions', []),
|
||||
'text': error.get('sUnderlined', ''),
|
||||
'before': error.get('sBefore', ''),
|
||||
'after': error.get('sAfter', '')
|
||||
}
|
||||
grammar_suggestions.append(suggestion)
|
||||
|
||||
# Process spelling errors
|
||||
for error in paragraph.get('lSpellingErrors', []):
|
||||
suggestion = {
|
||||
'paragraph': paragraph_index,
|
||||
'start': error.get('nStart', 0),
|
||||
'end': error.get('nEnd', 0),
|
||||
'type': 'spelling',
|
||||
'message': 'Erreur d\'orthographe',
|
||||
'suggestions': error.get('aSuggestions', []),
|
||||
'text': error.get('sUnderlined', ''),
|
||||
'before': error.get('sBefore', ''),
|
||||
'after': error.get('sAfter', '')
|
||||
}
|
||||
grammar_suggestions.append(suggestion)
|
||||
|
||||
# Clean up the temporary file
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
logger.info(f"Found {len(grammar_suggestions)} grammar/spelling suggestions")
|
||||
return grammar_suggestions
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Error running grammalecte-cli: {e}")
|
||||
logger.error(f"stdout: {e.stdout}")
|
||||
logger.error(f"stderr: {e.stderr}")
|
||||
return []
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing grammalecte-cli output: {e}")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during grammar checking: {e}")
|
||||
return []
|
||||
|
||||
def fetch_wiki_page(key, language='en', is_specific_page=False):
|
||||
"""
|
||||
Fetch wiki page for a given key or specific page
|
||||
|
@ -225,6 +313,7 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
|
|||
|
||||
# Count words in the content
|
||||
content = soup.select_one('#mw-content-text')
|
||||
clean_text = ""
|
||||
if content:
|
||||
# Remove script and style elements
|
||||
for script in content.select('script, style'):
|
||||
|
@ -235,8 +324,14 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
|
|||
languages_elem.extract()
|
||||
|
||||
# Get text and count words
|
||||
text = content.get_text(separator=' ', strip=True)
|
||||
word_count = len(text.split())
|
||||
clean_text = content.get_text(separator=' ', strip=True)
|
||||
word_count = len(clean_text.split())
|
||||
|
||||
# Check grammar for French pages
|
||||
grammar_suggestions = []
|
||||
if language == 'fr':
|
||||
logger.info(f"Checking grammar for French page: {key}")
|
||||
grammar_suggestions = check_grammar_with_grammalecte(clean_text)
|
||||
|
||||
# Extract links
|
||||
links = content.select('a')
|
||||
|
@ -433,6 +528,7 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
|
|||
media_count = 0
|
||||
media_details = []
|
||||
categories = []
|
||||
grammar_suggestions = []
|
||||
|
||||
return {
|
||||
'key': key,
|
||||
|
@ -449,7 +545,8 @@ def fetch_wiki_page(key, language='en', is_specific_page=False):
|
|||
'media_details': media_details,
|
||||
'categories': categories,
|
||||
'description_img_url': description_img_url,
|
||||
'is_specific_page': is_specific_page
|
||||
'is_specific_page': is_specific_page,
|
||||
'grammar_suggestions': grammar_suggestions
|
||||
}
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
|
|
|
@ -2,15 +2,15 @@ key,language,url,last_modified,sections,word_count,link_count,media_count,stalen
|
|||
building,en,https://wiki.openstreetmap.org/wiki/Key:building,2025-06-10,31,3774,627,158,8.91,https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
|
||||
building,fr,https://wiki.openstreetmap.org/wiki/FR:Key:building,2025-05-22,25,3181,544,155,8.91,https://wiki.openstreetmap.org/w/images/thumb/6/61/Emptyhouse.jpg/200px-Emptyhouse.jpg
|
||||
Anatomie_des_étiquettes_osm,en,https://wiki.openstreetmap.org/wiki/Anatomie_des_étiquettes_osm,2025-06-08,22,963,53,0,100,
|
||||
FR:Tag:leisure%3Dchildren_club,fr,https://wiki.openstreetmap.org/wiki/FR:Tag:leisure%3Dchildren_club,2024-05-02,8,294,67,10,0,https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG/200px-Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG
|
||||
https://wiki.openstreetmap.org/wiki/Tag:leisure%3Dchildren_club,en,https://wiki.openstreetmap.org/wiki/Tag:leisure%3Dchildren_club,2025-02-02,9,163,69,9,100,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
FR:Tag:harassment_prevention%3Dask_angela,fr,https://wiki.openstreetmap.org/wiki/FR:Tag:harassment_prevention%3Dask_angela,2025-07-10,20,873,166,15,0,https://wiki.openstreetmap.org/w/images/thumb/1/15/2024-06-27T08.40.50_ask_angela_lyon.jpg/200px-2024-06-27T08.40.50_ask_angela_lyon.jpg
|
||||
https://wiki.openstreetmap.org/wiki/Tag:harassment_prevention%3Dask_angela,en,https://wiki.openstreetmap.org/wiki/Tag:harassment_prevention%3Dask_angela,2025-02-22,14,463,72,9,100,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
FR:Tag:leisure=children_club,fr,https://wiki.openstreetmap.org/wiki/FR:Tag:leisure=children_club,2024-05-02,8,294,67,10,0,https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG/200px-Dave_%26_Buster%27s_video_arcade_in_Columbus%2C_OH_-_17910.JPG
|
||||
https://wiki.openstreetmap.org/wiki/Tag:leisure=children_club,en,https://wiki.openstreetmap.org/wiki/Tag:leisure=children_club,2025-02-02,9,163,69,9,100,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
Key:harassment_prevention,en,https://wiki.openstreetmap.org/wiki/Key:harassment_prevention,2024-08-10,12,196,69,14,66.72,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
Key:harassment_prevention,fr,https://wiki.openstreetmap.org/wiki/FR:Key:harassment_prevention,2025-07-03,15,328,83,14,66.72,https://wiki.openstreetmap.org/w/images/thumb/7/76/Osm_element_node.svg/30px-Osm_element_node.svg.png
|
||||
Proposal process,en,https://wiki.openstreetmap.org/wiki/Proposal process,2025-08-13,44,5253,187,4,165.53,https://wiki.openstreetmap.org/w/images/thumb/c/c2/Save_proposal_first.png/761px-Save_proposal_first.png
|
||||
Proposal process,fr,https://wiki.openstreetmap.org/wiki/FR:Proposal process,2023-09-22,15,1146,24,0,165.53,
|
||||
Proposal process,en,https://wiki.openstreetmap.org/wiki/Proposal process,2025-08-13,46,5292,202,4,166.25,https://wiki.openstreetmap.org/w/images/thumb/c/c2/Save_proposal_first.png/761px-Save_proposal_first.png
|
||||
Proposal process,fr,https://wiki.openstreetmap.org/wiki/FR:Proposal process,2023-09-22,15,1146,24,0,166.25,
|
||||
Automated_Edits_code_of_conduct,en,https://wiki.openstreetmap.org/wiki/Automated_Edits_code_of_conduct,2025-07-26,19,2062,69,0,26.35,
|
||||
Automated_Edits_code_of_conduct,fr,https://wiki.openstreetmap.org/wiki/FR:Automated_Edits_code_of_conduct,2025-04-03,17,1571,16,0,26.35,
|
||||
Key:cuisine,en,https://wiki.openstreetmap.org/wiki/Key:cuisine,2025-07-23,17,3422,693,303,107.73,https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/Food_montage.jpg/200px-Food_montage.jpg
|
||||
Key:cuisine,fr,https://wiki.openstreetmap.org/wiki/FR:Key:cuisine,2024-02-16,15,2866,690,316,107.73,https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/Food_montage.jpg/200px-Food_montage.jpg
|
||||
Libre_Charge_Map,en,https://wiki.openstreetmap.org/wiki/Libre_Charge_Map,2025-07-28,11,328,10,2,100,https://wiki.openstreetmap.org/w/images/thumb/8/8e/Screenshot_2025-07-28_at_14-40-11_LibreChargeMap_-_OSM_Bliss.png/300px-Screenshot_2025-07-28_at_14-40-11_LibreChargeMap_-_OSM_Bliss.png
|
||||
OSM_Mon_Commerce,en,https://wiki.openstreetmap.org/wiki/OSM_Mon_Commerce,2025-07-29,17,418,34,3,100,https://wiki.openstreetmap.org/w/images/thumb/6/67/Villes_OSM_Mon_Commerce.png/500px-Villes_OSM_Mon_Commerce.png
|
||||
|
|
|
Loading…
Add table
Add a link
Reference in a new issue