add analyse fréquence mots tagcloud

2025-08-30 18:57:27 +02:00 · 2025-08-30 18:57:27 +02:00 · 056387013d
commit 056387013d
parent 7ae7d5915b
9 changed files with 781 additions and 6 deletions
--- a/analyse_orthographe_grammaire.py
+++ b/analyse_orthographe_grammaire.py
@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Script pour analyser les fautes d'orthographe et de grammaire dans un fichier livre.org
+et générer un rapport par chapitre.
+
+Ce script:
+1. Lit le fichier livre.org
+2. Extrait le texte par chapitre
+3. Analyse les fautes d'orthographe et de grammaire dans chaque chapitre
+4. Génère un rapport détaillé des erreurs trouvées
+"""
+
+import re
+import os
+import csv
+import argparse
+from pyspellchecker import SpellChecker
+import language_tool_python
+
+# Définir les arguments en ligne de commande
+parser = argparse.ArgumentParser(description='Analyser les fautes d\'orthographe et de grammaire dans un fichier Org-mode.')
+parser.add_argument('dossier', nargs='?', help='Le chemin du dossier contenant le fichier livre.org. Si aucun dossier n\'est spécifié, le dossier courant sera utilisé.', default=os.getcwd())
+args = parser.parse_args()
+
+# Chemin vers le fichier livre.org
+fichier_livre = f"{args.dossier}/livre.org"
+
+def extract_chapters(file_path):
+    """
+    Extrait les chapitres d'un fichier org-mode.
+    Retourne un dictionnaire avec les titres des chapitres comme clés et leur contenu comme valeurs.
+    """
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+    
+    # Diviser le contenu par chapitres (lignes commençant par **)
+    chapter_pattern = r'^\*\* (.*?)$(.*?)(?=^\*\* |\Z)'
+    chapters = re.findall(chapter_pattern, content, re.MULTILINE | re.DOTALL)
+    
+    chapter_dict = {}
+    for title, content in chapters:
+        # Nettoyer le titre (supprimer ": title:" s'il existe)
+        clean_title = re.sub(r'\s*:\s*title\s*:', '', title).strip()
+        
+        # Nettoyer le contenu
+        clean_content = clean_chapter_content(content)
+        
+        chapter_dict[clean_title] = clean_content
+    
+    return chapter_dict
+
+def clean_chapter_content(content):
+    """
+    Nettoie le contenu d'un chapitre en supprimant les commentaires et les balises org-mode.
+    """
+    # Supprimer les blocs de commentaires
+    content = re.sub(r'#\+begin_comment.*?#\+end_comment', '', content, flags=re.DOTALL | re.IGNORECASE)
+    
+    # Supprimer les lignes de métadonnées (commençant par #+)
+    content = re.sub(r'^\s*#\+.*$', '', content, flags=re.MULTILINE)
+    
+    # Supprimer les sous-titres (lignes commençant par ***)
+    content = re.sub(r'^\s*\*\*\*.*$', '', content, flags=re.MULTILINE)
+    
+    # Supprimer les liens org-mode [[...][...]] et [[...]]
+    content = re.sub(r'\[\[.*?\]\](?:\[.*?\])?', '', content)
+    
+    # Supprimer les lignes vides multiples
+    content = re.sub(r'\n\s*\n', '\n\n', content)
+    
+    return content.strip()
+
+def check_spelling(text, lang='fr'):
+    """
+    Vérifie l'orthographe d'un texte et retourne les mots mal orthographiés.
+    """
+    spell = SpellChecker(language=lang)
+    
+    # Diviser le texte en mots
+    words = re.findall(r'\b\w+\b', text.lower())
+    
+    # Trouver les mots mal orthographiés
+    misspelled = spell.unknown(words)
+    
+    # Créer un dictionnaire avec les mots mal orthographiés et leurs suggestions
+    spelling_errors = {}
+    for word in misspelled:
+        # Obtenir les suggestions de correction
+        suggestions = spell.candidates(word)
+        # Limiter à 5 suggestions maximum
+        suggestions_list = list(suggestions)[:5]
+        spelling_errors[word] = suggestions_list
+    
+    return spelling_errors
+
+def check_grammar(text, lang='fr'):
+    """
+    Vérifie la grammaire d'un texte et retourne les erreurs grammaticales.
+    """
+    # Initialiser l'outil de vérification grammaticale
+    tool = language_tool_python.LanguageTool(lang)
+    
+    # Vérifier le texte
+    matches = tool.check(text)
+    
+    # Créer une liste d'erreurs grammaticales
+    grammar_errors = []
+    for match in matches:
+        # Ignorer les erreurs d'orthographe (déjà traitées par le vérificateur d'orthographe)
+        if match.ruleId.startswith('MORFOLOGIK_RULE'):
+            continue
+        
+        error = {
+            'message': match.message,
+            'context': match.context,
+            'suggestions': match.replacements,
+            'offset': match.offset,
+            'length': match.errorLength,
+            'rule': match.ruleId
+        }
+        grammar_errors.append(error)
+    
+    # Fermer l'outil pour libérer les ressources
+    tool.close()
+    
+    return grammar_errors
+
+def generate_error_report(chapters, output_path):
+    """
+    Génère un rapport des erreurs d'orthographe et de grammaire par chapitre.
+    """
+    with open(output_path, 'w', encoding='utf-8') as report_file:
+        report_file.write("# Rapport d'analyse orthographique et grammaticale\n\n")
+        
+        total_spelling_errors = 0
+        total_grammar_errors = 0
+        
+        for chapter_title, chapter_content in chapters.items():
+            report_file.write(f"## Chapitre: {chapter_title}\n\n")
+            
+            # Vérifier l'orthographe
+            spelling_errors = check_spelling(chapter_content)
+            
+            # Vérifier la grammaire
+            grammar_errors = check_grammar(chapter_content)
+            
+            # Mettre à jour les totaux
+            total_spelling_errors += len(spelling_errors)
+            total_grammar_errors += len(grammar_errors)
+            
+            # Écrire les erreurs d'orthographe
+            report_file.write("### Erreurs d'orthographe\n\n")
+            if spelling_errors:
+                for word, suggestions in spelling_errors.items():
+                    suggestions_str = ", ".join(suggestions) if suggestions else "Aucune suggestion"
+                    report_file.write(f"- **{word}**: {suggestions_str}\n")
+            else:
+                report_file.write("Aucune erreur d'orthographe détectée.\n")
+            
+            report_file.write("\n")
+            
+            # Écrire les erreurs grammaticales
+            report_file.write("### Erreurs grammaticales\n\n")
+            if grammar_errors:
+                for error in grammar_errors:
+                    suggestions_str = ", ".join(error['suggestions'][:5]) if error['suggestions'] else "Aucune suggestion"
+                    context = error['context'].replace(error['context'][error['offset']:error['offset']+error['length']], 
+                                                      f"**{error['context'][error['offset']:error['offset']+error['length']]}**")
+                    report_file.write(f"- **Erreur**: {error['message']}\n")
+                    report_file.write(f"  - **Contexte**: {context}\n")
+                    report_file.write(f"  - **Suggestions**: {suggestions_str}\n\n")
+            else:
+                report_file.write("Aucune erreur grammaticale détectée.\n")
+            
+            report_file.write("\n---\n\n")
+        
+        # Écrire le résumé
+        report_file.write("## Résumé\n\n")
+        report_file.write(f"- **Nombre total de chapitres analysés**: {len(chapters)}\n")
+        report_file.write(f"- **Nombre total d'erreurs d'orthographe**: {total_spelling_errors}\n")
+        report_file.write(f"- **Nombre total d'erreurs grammaticales**: {total_grammar_errors}\n")
+    
+    print(f"Rapport d'erreurs généré: {output_path}")
+
+def save_to_csv(chapters, output_path):
+    """
+    Sauvegarde un résumé des erreurs dans un fichier CSV.
+    """
+    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(['Chapitre', 'Erreurs d\'orthographe', 'Erreurs grammaticales', 'Total'])
+        
+        for chapter_title, chapter_content in chapters.items():
+            spelling_errors = check_spelling(chapter_content)
+            grammar_errors = check_grammar(chapter_content)
+            
+            total_errors = len(spelling_errors) + len(grammar_errors)
+            writer.writerow([chapter_title, len(spelling_errors), len(grammar_errors), total_errors])
+    
+    print(f"Résumé des erreurs sauvegardé dans {output_path}")
+
+def main():
+    print(f"Analyse du fichier: {fichier_livre}")
+    
+    # Extraire les chapitres
+    chapters = extract_chapters(fichier_livre)
+    print(f"Nombre de chapitres trouvés: {len(chapters)}")
+    
+    # Définir les chemins de sortie
+    report_output = f"{args.dossier}/rapport_orthographe_grammaire.md"
+    csv_output = f"{args.dossier}/resume_erreurs.csv"
+    
+    # Générer le rapport d'erreurs
+    generate_error_report(chapters, report_output)
+    
+    # Sauvegarder le résumé en CSV
+    save_to_csv(chapters, csv_output)
+    
+    print("Analyse orthographique et grammaticale terminée avec succès!")
+
+if __name__ == "__main__":
+    main()