page de tags avec échelle de taille selon la fréquence

2025-12-04 17:20:07 +01:00 · 2025-05-12 23:01:56 +02:00 · 2025-05-12 23:01:56 +02:00 · a473479b83
commit a473479b83
parent 341292aa12
7 changed files with 112 additions and 58 deletions
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -123,7 +123,7 @@ if generate_linkings_json :
                            if f.endswith(('.org', '.md', '.gmi'))]
                    
                    for file_index, file_name in enumerate(files):
-                        print(f"Traitement du fichier {file_name}, {file_index+1}/{len(files)}")
+                        # print(f"Traitement du fichier {file_index+1}/{len(files)}, {file_name}")
                        # Vérifier si le fichier se termine par une extension supportée
                        if not (file_name.endswith('.org') or file_name.endswith('.md') or file_name.endswith('.gmi')):
                            print(f"Fichier {file_name} non supporté")
@ -139,7 +139,7 @@ if generate_linkings_json :
                            # pour déterminer les informations qu'il contient
                            # afin de les stocker dans un json pour la génération des pages html et gemini
                            with open(file_path, "r", encoding="utf-8") as f:
-                                print(f"----- Traitement de l'article {counter}: {file_name}")
+                                # print(f"----- Traitement de l'article {counter}: {file_name}")
                                content = f.read()
                                # Convertir le contenu Org en HTML
                                title = find_first_level1_title(content)
@ -169,13 +169,13 @@ if generate_linkings_json :
                                html_content_without_h1 = ''
                                # Vérifier l'existence du fichier HTML pour déterminer last_html_build
                                html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
-                                print(f"html_path existe il? : {html_path}")
+                                # print(f"html_path existe il? : {html_path}")
                                last_html_build_time = None
                                if os.path.exists(html_path):
                                    # Obtenir la date de création du fichier HTML
                                    last_html_build_time = os.path.getctime(html_path)

-                                    print(f"----- last_html_build EXISTE: {last_html_build_time} : {html_path}")
+                                    # print(f"----- last_html_build EXISTE: {last_html_build_time} : {html_path}")
                                else:
                                    print(f"html_path n'existe pas: on va le créer")
                                # Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
@ -202,15 +202,15 @@ if generate_linkings_json :
                                
                                if last_html_build_time:
                                    file_modified_time = os.path.getmtime(file_path)
-                                    print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
-                                    print(f"--------- last_html_build_time: {last_html_build_time}")
+                                    # print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
+                                    # print(f"--------- last_html_build_time: {last_html_build_time}")
                                    # Obtenir l'heure de dernière modification du fichier HTML
                                    
                                    rebuild_this_article_html = file_modified_time > last_html_build_time
                                    if rebuild_this_article_html:
                                        print(f"\033[91m--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}\033[0m")
-                                    else:
-#                                         print(f"\033[91m--------- article non modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}, on ne recrée pas\033[0m")
+                                    # else:
+                                    #     print(f"\033[91m--------- article non modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}, on ne recrée pas\033[0m")
                                        
                                else:
                                    # si il n'y a pas de fichier html, on le construit pour la première fois
@ -221,7 +221,7 @@ if generate_linkings_json :
                                    rebuild_counter += 1


-                                print(f"slug_with_year: {slug_with_year}")
+                                # print(f"slug_with_year: {slug_with_year}")
                                # Afficher les clés de files_dict pour débogage
                                # print("\nClés disponibles dans files_dict:")
                                # for key in files_dict.keys():
@ -229,16 +229,16 @@ if generate_linkings_json :
                                # print("\n")
                                # Garder le contenu HTML existant si déjà présent
                                if not rebuild_this_article_html and slug_with_year in files_dict and 'html_content' in files_dict[slug_with_year]:
-                                    print('========= on reprend le contenu html existant')
+                                    # print('========= on reprend le contenu html existant')
                                    len_html = len(files_dict[slug_with_year]['html_content'])
-                                    print(f"len_html: {len_html}")
+                                    # print(f"len_html: {len_html}")
                                    if len_html > 0 :
                                        html_content = files_dict[slug_with_year]['html_content']
                                        html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
                                    if len(files_dict[slug_with_year]['html_content_without_h1']) > 0 :
                                        html_content_without_h1 = files_dict[slug_with_year]['html_content_without_h1']
                                else:
-                                    print('========= pas de contenu html existant')
+                                    print('========= pas de contenu html existant: ', title)
                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc html_content : {title} en html\033[0m")
                                    pandoc_runs_counter += 1
                                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
@ -297,9 +297,9 @@ if generate_linkings_json :
        except OSError as e:
            print(f"Erreur lors de la lecture du dossier {directory}: {e}")
            continue
-
-    print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
-    print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")
+    if rebuild_counter > 0:
+        print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
+        # print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")

 else:
    print(f"Pas de génération des liens entre articles")