update folder script

2025-08-31 22:37:24 +02:00 · 2025-08-31 22:37:24 +02:00 · da06022b56
commit da06022b56
parent 1713aa30b5
20 changed files with 573 additions and 95 deletions
--- a/analyse_orthographe_grammaire.py
+++ b/analyse_orthographe_grammaire.py
@ -15,8 +15,21 @@ import re
 import os
 import csv
 import argparse
-import language_tool_python
-from spellchecker import SpellChecker
+
+# Vérifier si les modules nécessaires sont disponibles
+try:
+    import language_tool_python
+    LANGUAGE_TOOL_AVAILABLE = True
+except ImportError:
+    print("AVERTISSEMENT: Module language_tool_python non disponible. La vérification grammaticale sera désactivée.")
+    LANGUAGE_TOOL_AVAILABLE = False
+
+try:
+    from spellchecker import SpellChecker
+    SPELLCHECKER_AVAILABLE = True
+except ImportError:
+    print("AVERTISSEMENT: Module spellchecker non disponible. La vérification orthographique sera désactivée.")
+    SPELLCHECKER_AVAILABLE = False

 # Définir les arguments en ligne de commande
 parser = argparse.ArgumentParser(description='Analyser les fautes d\'orthographe et de grammaire dans un fichier Org-mode.')
@ -94,68 +107,136 @@ def check_spelling(text, lang='fr', custom_dict_path='dictionnaire_personnalise.
    Vérifie l'orthographe d'un texte et retourne les mots mal orthographiés.
    Utilise un dictionnaire personnalisé pour exclure certains mots de la vérification.
    """
-    spell = SpellChecker(language=lang)
+    # Vérifier si le module spellchecker est disponible
+    if not SPELLCHECKER_AVAILABLE:
+        print("Vérification orthographique désactivée car le module spellchecker n'est pas disponible.")
+        return []
+    
+    try:
+        spell = SpellChecker(language=lang)

-    # Charger le dictionnaire personnalisé
-    custom_words = load_custom_dictionary(custom_dict_path)
+        # Charger le dictionnaire personnalisé
+        custom_words = load_custom_dictionary(custom_dict_path)

-    # Ajouter les mots du dictionnaire personnalisé au dictionnaire du vérificateur
-    if custom_words:
-        spell.word_frequency.load_words(custom_words)
+        # Ajouter les mots du dictionnaire personnalisé au dictionnaire du vérificateur
+        if custom_words:
+            spell.word_frequency.load_words(custom_words)

-    # Diviser le texte en mots
-    words = re.findall(r'\b\w+\b', text.lower())
+        # Diviser le texte en mots
+        words = re.findall(r'\b\w+\b', text.lower())

-    # Trouver les mots mal orthographiés
-    misspelled = spell.unknown(words)
+        # Trouver les mots mal orthographiés
+        misspelled = spell.unknown(words)

-    # Créer un dictionnaire avec les mots mal orthographiés et leurs suggestions
-    spelling_errors = {}
-    for word in misspelled:
-        # Vérifier si le mot est dans le dictionnaire personnalisé
-        if word in custom_words:
-            continue
+        # Créer un dictionnaire avec les mots mal orthographiés et leurs suggestions
+        spelling_errors = []
+        for word in misspelled:
+            # Vérifier si le mot est dans le dictionnaire personnalisé
+            if word in custom_words:
+                continue

-        # Obtenir les suggestions de correction
-        suggestions = spell.candidates(word)
-        # Limiter à 5 suggestions maximum
-        suggestions_list = list(suggestions) if suggestions is not None else []
-        suggestions_list = suggestions_list[:5]
-        spelling_errors[word] = suggestions_list
+            # Obtenir les suggestions de correction
+            suggestions = spell.candidates(word)
+            # Limiter à 5 suggestions maximum
+            suggestions_list = list(suggestions) if suggestions is not None else []
+            suggestions_list = suggestions_list[:5]
+            
+            # Trouver toutes les occurrences du mot dans le texte original
+            for match in re.finditer(r'\b' + re.escape(word) + r'\b', text, re.IGNORECASE):
+                # Extraire le contexte autour du mot
+                word_start = match.start()
+                word_end = match.end()
+                
+                # Trouver les limites des lignes contenant le mot
+                line_start = text.rfind('\n', 0, word_start) + 1 if text.rfind('\n', 0, word_start) >= 0 else 0
+                line_end = text.find('\n', word_end) if text.find('\n', word_end) >= 0 else len(text)
+                
+                # Extraire les lignes précédentes et suivantes pour plus de contexte
+                prev_line_start = text.rfind('\n', 0, line_start - 1) + 1 if text.rfind('\n', 0, line_start - 1) >= 0 else 0
+                next_line_end = text.find('\n', line_end + 1) if text.find('\n', line_end + 1) >= 0 else len(text)
+                
+                # Créer le contexte standard et étendu
+                context = text[line_start:line_end]
+                extended_context = text[prev_line_start:next_line_end]
+                
+                # Calculer les offsets pour les contextes
+                context_offset = word_start - line_start
+                extended_offset = word_start - prev_line_start
+                
+                spelling_errors.append({
+                    'word': word,
+                    'context': context,
+                    'extended_context': extended_context,
+                    'context_offset': context_offset,
+                    'extended_offset': extended_offset,
+                    'suggestions': suggestions_list
+                })

-    return spelling_errors
+        return spelling_errors
+    except Exception as e:
+        print(f"Erreur lors de la vérification orthographique: {str(e)}")
+        return []

 def check_grammar(text, lang='fr'):
    """
    Vérifie la grammaire d'un texte et retourne les erreurs grammaticales.
    """
-    # Initialiser l'outil de vérification grammaticale
-    tool = language_tool_python.LanguageTool(lang)
+    # Vérifier si le module language_tool_python est disponible
+    if not LANGUAGE_TOOL_AVAILABLE:
+        print("Vérification grammaticale désactivée car le module language_tool_python n'est pas disponible.")
+        return []
+    
+    try:
+        # Initialiser l'outil de vérification grammaticale
+        tool = language_tool_python.LanguageTool(lang)

-    # Vérifier le texte
-    matches = tool.check(text)
+        # Vérifier le texte
+        matches = tool.check(text)

-    # Créer une liste d'erreurs grammaticales
-    grammar_errors = []
-    for match in matches:
-        # Ignorer les erreurs d'orthographe (déjà traitées par le vérificateur d'orthographe)
-        if match.ruleId.startswith('MORFOLOGIK_RULE'):
-            continue
+        # Créer une liste d'erreurs grammaticales
+        grammar_errors = []
+        for match in matches:
+            # Ignorer les erreurs d'orthographe (déjà traitées par le vérificateur d'orthographe)
+            if match.ruleId.startswith('MORFOLOGIK_RULE'):
+                continue

-        error = {
-            'message': match.message,
-            'context': match.context,
-            'suggestions': match.replacements,
-            'offset': match.offset,
-            'length': match.errorLength,
-            'rule': match.ruleId
-        }
-        grammar_errors.append(error)
+            # Extraire plus de contexte autour de l'erreur
+            error_start = match.offset
+            error_end = match.offset + match.errorLength
+            
+            # Trouver les limites des lignes contenant l'erreur
+            line_start = text.rfind('\n', 0, error_start) + 1 if text.rfind('\n', 0, error_start) >= 0 else 0
+            line_end = text.find('\n', error_end) if text.find('\n', error_end) >= 0 else len(text)
+            
+            # Extraire les lignes précédentes et suivantes pour plus de contexte
+            prev_line_start = text.rfind('\n', 0, line_start - 1) + 1 if text.rfind('\n', 0, line_start - 1) >= 0 else 0
+            next_line_end = text.find('\n', line_end + 1) if text.find('\n', line_end + 1) >= 0 else len(text)
+            
+            # Créer le contexte étendu
+            extended_context = text[prev_line_start:next_line_end]
+            
+            # Ajuster les offsets pour le contexte étendu
+            extended_offset = error_start - prev_line_start
+            
+            error = {
+                'message': match.message,
+                'context': match.context,
+                'extended_context': extended_context,
+                'suggestions': match.replacements,
+                'offset': match.offset,
+                'extended_offset': extended_offset,
+                'length': match.errorLength,
+                'rule': match.ruleId
+            }
+            grammar_errors.append(error)

-    # Fermer l'outil pour libérer les ressources
-    tool.close()
+        # Fermer l'outil pour libérer les ressources
+        tool.close()

-    return grammar_errors
+        return grammar_errors
+    except Exception as e:
+        print(f"Erreur lors de la vérification grammaticale: {str(e)}")
+        return []

 def generate_error_report(chapters, output_path):
    """
@ -183,9 +264,39 @@ def generate_error_report(chapters, output_path):
            # Écrire les erreurs d'orthographe
            report_file.write("### Erreurs d'orthographe\n\n")
            if spelling_errors:
-                for word, suggestions in spelling_errors.items():
-                    suggestions_str = ", ".join(suggestions) if suggestions else "Aucune suggestion"
+                # Regrouper les erreurs par mot
+                errors_by_word = {}
+                for error in spelling_errors:
+                    word = error['word']
+                    if word not in errors_by_word:
+                        errors_by_word[word] = {
+                            'suggestions': error['suggestions'],
+                            'occurrences': []
+                        }
+                    errors_by_word[word]['occurrences'].append({
+                        'context': error['context'],
+                        'extended_context': error['extended_context'],
+                        'context_offset': error['context_offset'],
+                        'extended_offset': error['extended_offset']
+                    })
+                
+                # Écrire les erreurs regroupées par mot
+                for word, data in errors_by_word.items():
+                    suggestions_str = ", ".join(data['suggestions']) if data['suggestions'] else "Aucune suggestion"
                    report_file.write(f"- **{word}**: {suggestions_str}\n")
+                    
+                    # Ajouter les contextes pour chaque occurrence
+                    for i, occurrence in enumerate(data['occurrences']):
+                        # Mettre en évidence le mot dans le contexte
+                        context = occurrence['context']
+                        offset = occurrence['context_offset']
+                        highlighted_context = context[:offset] + f"**{word}**" + context[offset+len(word):]
+                        
+                        # Ajouter le contexte étendu
+                        extended_context = occurrence['extended_context']
+                        report_file.write(f"  - **Occurrence {i+1}**:\n")
+                        report_file.write(f"    - **Contexte**: {highlighted_context}\n")
+                        report_file.write(f"    - **Contexte étendu**: ```\n{extended_context}\n```\n")
            else:
                report_file.write("Aucune erreur d'orthographe détectée.\n")
            
@ -194,12 +305,20 @@ def generate_error_report(chapters, output_path):
            # Écrire les erreurs grammaticales
            report_file.write("### Erreurs grammaticales\n\n")
            if grammar_errors:
-                for error in grammar_errors:
+                for i, error in enumerate(grammar_errors):
                    suggestions_str = ", ".join(error['suggestions'][:5]) if error['suggestions'] else "Aucune suggestion"
+                    
+                    # Mettre en évidence l'erreur dans le contexte
                    context = error['context'].replace(error['context'][error['offset']:error['offset']+error['length']], 
                                                      f"**{error['context'][error['offset']:error['offset']+error['length']]}**")
-                    report_file.write(f"- **Erreur**: {error['message']}\n")
+                    
+                    report_file.write(f"- **Erreur {i+1}**: {error['message']}\n")
                    report_file.write(f"  - **Contexte**: {context}\n")
+                    
+                    # Ajouter le contexte étendu
+                    if 'extended_context' in error:
+                        report_file.write(f"  - **Contexte étendu**: ```\n{error['extended_context']}\n```\n")
+                    
                    report_file.write(f"  - **Suggestions**: {suggestions_str}\n\n")
            else:
                report_file.write("Aucune erreur grammaticale détectée.\n")