From 5acbfd846149d6efaadfe7197a6064b19e8e20be Mon Sep 17 00:00:00 2001
From: Tykayn <contact@cipherbliss.com>
Date: Sat, 30 Aug 2025 18:57:27 +0200
Subject: [PATCH] =?UTF-8?q?add=20analyse=20fr=C3=A9quence=20mots=20tagclou?=
 =?UTF-8?q?d?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 analyse_orthographe_grammaire.py | 67 +++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 23 deletions(-)

diff --git a/analyse_orthographe_grammaire.py b/analyse_orthographe_grammaire.py
index 2d6a96f3..924bd682 100755
--- a/analyse_orthographe_grammaire.py
+++ b/analyse_orthographe_grammaire.py
@@ -15,7 +15,8 @@ import re
 import os
 import csv
 import argparse
-
+from pyspellchecker import SpellChecker
+import language_tool_python
 # Vérifier si les modules nécessaires sont disponibles
 try:
     import language_tool_python
@@ -83,7 +84,6 @@ def clean_chapter_content(content):
     content = re.sub(r'\n\s*\n', '\n\n', content)
     
     return content.strip()
-
 def load_custom_dictionary(file_path):
     """
     Charge le dictionnaire personnalisé à partir d'un fichier texte.
@@ -102,7 +102,9 @@ def load_custom_dictionary(file_path):
 
     return custom_words
 
+
 def check_spelling(text, lang='fr', custom_dict_path='dictionnaire_personnalise.txt'):
+
     """
     Vérifie l'orthographe d'un texte et retourne les mots mal orthographiés.
     Utilise un dictionnaire personnalisé pour exclure certains mots de la vérification.
@@ -111,7 +113,7 @@ def check_spelling(text, lang='fr', custom_dict_path='dictionnaire_personnalise.
     if not SPELLCHECKER_AVAILABLE:
         print("Vérification orthographique désactivée car le module spellchecker n'est pas disponible.")
         return []
-    
+
     try:
         spell = SpellChecker(language=lang)
 
@@ -140,29 +142,29 @@ def check_spelling(text, lang='fr', custom_dict_path='dictionnaire_personnalise.
             # Limiter à 5 suggestions maximum
             suggestions_list = list(suggestions) if suggestions is not None else []
             suggestions_list = suggestions_list[:5]
-            
+
             # Trouver toutes les occurrences du mot dans le texte original
             for match in re.finditer(r'\b' + re.escape(word) + r'\b', text, re.IGNORECASE):
                 # Extraire le contexte autour du mot
                 word_start = match.start()
                 word_end = match.end()
-                
+
                 # Trouver les limites des lignes contenant le mot
                 line_start = text.rfind('\n', 0, word_start) + 1 if text.rfind('\n', 0, word_start) >= 0 else 0
                 line_end = text.find('\n', word_end) if text.find('\n', word_end) >= 0 else len(text)
-                
+
                 # Extraire les lignes précédentes et suivantes pour plus de contexte
                 prev_line_start = text.rfind('\n', 0, line_start - 1) + 1 if text.rfind('\n', 0, line_start - 1) >= 0 else 0
                 next_line_end = text.find('\n', line_end + 1) if text.find('\n', line_end + 1) >= 0 else len(text)
-                
+
                 # Créer le contexte standard et étendu
                 context = text[line_start:line_end]
                 extended_context = text[prev_line_start:next_line_end]
-                
+
                 # Calculer les offsets pour les contextes
                 context_offset = word_start - line_start
                 extended_offset = word_start - prev_line_start
-                
+
                 spelling_errors.append({
                     'word': word,
                     'context': context,
@@ -176,6 +178,24 @@ def check_spelling(text, lang='fr', custom_dict_path='dictionnaire_personnalise.
     except Exception as e:
         print(f"Erreur lors de la vérification orthographique: {str(e)}")
         return []
+    spell = SpellChecker(language=lang)
+
+    # Diviser le texte en mots
+    words = re.findall(r'\b\w+\b', text.lower())
+
+    # Trouver les mots mal orthographiés
+    misspelled = spell.unknown(words)
+
+    # Créer un dictionnaire avec les mots mal orthographiés et leurs suggestions
+    spelling_errors = {}
+    for word in misspelled:
+        # Obtenir les suggestions de correction
+        suggestions = spell.candidates(word)
+        # Limiter à 5 suggestions maximum
+        suggestions_list = list(suggestions)[:5]
+        spelling_errors[word] = suggestions_list
+
+    return spelling_errors
 
 def check_grammar(text, lang='fr'):
     """
@@ -185,7 +205,7 @@ def check_grammar(text, lang='fr'):
     if not LANGUAGE_TOOL_AVAILABLE:
         print("Vérification grammaticale désactivée car le module language_tool_python n'est pas disponible.")
         return []
-    
+
     try:
         # Initialiser l'outil de vérification grammaticale
         tool = language_tool_python.LanguageTool(lang)
@@ -203,21 +223,21 @@ def check_grammar(text, lang='fr'):
             # Extraire plus de contexte autour de l'erreur
             error_start = match.offset
             error_end = match.offset + match.errorLength
-            
+
             # Trouver les limites des lignes contenant l'erreur
             line_start = text.rfind('\n', 0, error_start) + 1 if text.rfind('\n', 0, error_start) >= 0 else 0
             line_end = text.find('\n', error_end) if text.find('\n', error_end) >= 0 else len(text)
-            
+
             # Extraire les lignes précédentes et suivantes pour plus de contexte
             prev_line_start = text.rfind('\n', 0, line_start - 1) + 1 if text.rfind('\n', 0, line_start - 1) >= 0 else 0
             next_line_end = text.find('\n', line_end + 1) if text.find('\n', line_end + 1) >= 0 else len(text)
-            
+
             # Créer le contexte étendu
             extended_context = text[prev_line_start:next_line_end]
-            
+
             # Ajuster les offsets pour le contexte étendu
             extended_offset = error_start - prev_line_start
-            
+
             error = {
                 'message': match.message,
                 'context': match.context,
@@ -279,19 +299,19 @@ def generate_error_report(chapters, output_path):
                         'context_offset': error['context_offset'],
                         'extended_offset': error['extended_offset']
                     })
-                
+
                 # Écrire les erreurs regroupées par mot
                 for word, data in errors_by_word.items():
                     suggestions_str = ", ".join(data['suggestions']) if data['suggestions'] else "Aucune suggestion"
                     report_file.write(f"- **{word}**: {suggestions_str}\n")
-                    
+
                     # Ajouter les contextes pour chaque occurrence
                     for i, occurrence in enumerate(data['occurrences']):
                         # Mettre en évidence le mot dans le contexte
                         context = occurrence['context']
                         offset = occurrence['context_offset']
                         highlighted_context = context[:offset] + f"**{word}**" + context[offset+len(word):]
-                        
+
                         # Ajouter le contexte étendu
                         extended_context = occurrence['extended_context']
                         report_file.write(f"  - **Occurrence {i+1}**:\n")
@@ -307,18 +327,19 @@ def generate_error_report(chapters, output_path):
             if grammar_errors:
                 for i, error in enumerate(grammar_errors):
                     suggestions_str = ", ".join(error['suggestions'][:5]) if error['suggestions'] else "Aucune suggestion"
-                    
+
                     # Mettre en évidence l'erreur dans le contexte
-                    context = error['context'].replace(error['context'][error['offset']:error['offset']+error['length']], 
+                    context = error['context'].replace(error['context'][error['offset']:error['offset']+error['length']],
                                                       f"**{error['context'][error['offset']:error['offset']+error['length']]}**")
-                    
+                    report_file.write(f"- **Erreur**: {error['message']}\n")
+
                     report_file.write(f"- **Erreur {i+1}**: {error['message']}\n")
                     report_file.write(f"  - **Contexte**: {context}\n")
-                    
+
                     # Ajouter le contexte étendu
                     if 'extended_context' in error:
                         report_file.write(f"  - **Contexte étendu**: ```\n{error['extended_context']}\n```\n")
-                    
+
                     report_file.write(f"  - **Suggestions**: {suggestions_str}\n\n")
             else:
                 report_file.write("Aucune erreur grammaticale détectée.\n")