up regen

2025-06-20 09:04:42 +02:00 · 2025-02-28 18:43:17 +01:00 · 2025-02-28 18:43:17 +01:00 · dbbae888f4
commit dbbae888f4
parent 3bf5856bab
15 changed files with 172 additions and 149 deletions
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -12,6 +12,7 @@ import argparse
 import pypandoc
 from jinja2 import Environment, FileSystemLoader
 import time
+import sys


 # Démarrer le chronomètre
@ -28,7 +29,7 @@ parser.add_argument('--enable_roam_id_rewrite', type=str, help='Activer ou non l
 parser.add_argument('--generate_html_pages', type=str, help='Activer ou non la génération des pages html', default=True)
 parser.add_argument('--generate_linkings_json', type=str, help='Activer ou non la génération du json des liens entre articles', default=True)
 parser.add_argument('--force_html_regen', action='store_true', help='Forcer la régénération des fichiers HTML même s\'ils existent déjà', default=False)
-parser.add_argument('--rebuild_articles_info_json', type=str, help='Chemin du fichier JSON des articles', default=True)
+parser.add_argument('--rebuild_articles_info_json', type=str, help='Reconstruire le fichier de données JSON des articles', default=False)

 args = parser.parse_args()
 run_gemini = args.run_gemini
@ -70,7 +71,7 @@ if rebuild_articles_info_json:

 files_dict = {}
 if os.path.exists(json_file):
-    #print(f"Chargement du fichier JSON existant: {json_file}")
+    print(f" ============== Chargement du fichier JSON existant: {json_file}")
    try:
        with open(json_file, 'r', encoding='utf-8') as f:
            files_dict = json.load(f)
@ -79,7 +80,7 @@ if os.path.exists(json_file):
        print(f"Erreur lors du chargement du fichier JSON: {e}")
        
 else:
-    print(f"Aucun fichier articles_info.json existant trouvé, reconstruction des informations du blog {args.blog}")
+    print(f"\033[91m ============== Aucun fichier articles_info.json existant trouvé, reconstruction des informations du blog {args.blog}\033[0m")



@ -151,6 +152,7 @@ if generate_linkings_json :
                                basename = get_basename(file_name)
                                date_str, annee, slug = find_year_and_slug_on_filename(basename)
                                slug = slugify_title(title)
+                                slug_with_year = f"{annee}/{slug}"
                                tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
                                
                                # Convertir les tags en liste si c'est un set
@ -206,10 +208,10 @@ if generate_linkings_json :
                                    
                                    rebuild_this_article_html = file_modified_time > last_html_build_time
                                    if rebuild_this_article_html:
-                                        print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
+                                        print(f"\033[91m--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}\033[0m")
                                    else:
-                                        print(f"--------- article non modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}, on ne recrée pas")
-                                        rebuild_this_article_html = False
+                                        print(f"\033[91m--------- article non modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}, on ne recrée pas\033[0m")
+                                        
                                else:
                                    # si il n'y a pas de fichier html, on le construit pour la première fois
                                    print('on reconstruit le html de l\'article', file_name)
@ -219,17 +221,25 @@ if generate_linkings_json :
                                    rebuild_counter += 1


+                                print(f"slug_with_year: {slug_with_year}")
+                                # Afficher les clés de files_dict pour débogage
+                                # print("\nClés disponibles dans files_dict:")
+                                # for key in files_dict.keys():
+                                #     print(f"- {key}")
+                                # print("\n")
                                # Garder le contenu HTML existant si déjà présent
-                                if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
-                                    #print('on reprend le contenu html existant')
-                                    if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0 :
-                                        html_content = files_dict[f"{annee}/{slug}"]['html_content']
-                                    if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0 :
-                                        html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
-                                    else:
+                                if not rebuild_this_article_html and slug_with_year in files_dict and 'html_content' in files_dict[slug_with_year]:
+                                    print('========= on reprend le contenu html existant')
+                                    len_html = len(files_dict[slug_with_year]['html_content'])
+                                    print(f"len_html: {len_html}")
+                                    if len_html > 0 :
+                                        html_content = files_dict[slug_with_year]['html_content']
                                        html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
+                                    if len(files_dict[slug_with_year]['html_content_without_h1']) > 0 :
+                                        html_content_without_h1 = files_dict[slug_with_year]['html_content_without_h1']
                                else:
-                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc : {title} en html\033[0m")
+                                    print('========= pas de contenu html existant')
+                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc html_content : {title} en html\033[0m")
                                    pandoc_runs_counter += 1
                                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                                    html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
@ -237,6 +247,9 @@ if generate_linkings_json :

                                    
                                if run_pandoc and rebuild_this_article_html or force_html_regen:
+                                    print(f"run_pandoc: {run_pandoc}")
+                                    print(f"rebuild_this_article_html: {rebuild_this_article_html}")
+                                    print(f"force_html_regen: {force_html_regen}")
                                    # convertir le contenu d'article org vers html
                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc : {title} en html\033[0m")
                                    # print(f"\033[91m.\033[0m", end='', flush=True)
@ -244,26 +257,18 @@ if generate_linkings_json :
                                        html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                                        html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
                                        pandoc_runs_counter += 1
-                                # else:
-                                #     html_content = content_without_h1

-                                # if run_gemini and rebuild_this_article_gemini:
-                                #     #print('-----------on régénère le gemini')
-                                #     # convertir le contenu d'article org vers gmi pour la capsule gemini
-                                #     gemini_content = org_to_gmi(content_without_h1)
-                                #     #print('len(gemini_content)', len(gemini_content))
-                                # else:
-                                #     print('-----------on ne régénère pas le gemini')
                                if rebuild_this_article_gemini:
                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR gemini : {title} en gmi\033[0m")
+                                    pandoc_runs_counter += 1
                                    gemini_content = org_to_gmi(content_without_h1)

-                                files_dict[f"{annee}/{slug}"] = {
+                                files_dict[slug_with_year] = {
                                    'path': file_path,
                                    'basename': basename,
                                    'roam_id': find_org_roam_id(content),
                                    'slug': f"{slug}/",
-                                    'slug_with_year': f"{annee}/{slug}",
+                                    'slug_with_year': slug_with_year,
                                    'date': boom[0],
                                    'lang': lang_folder,
                                    'article_type': article_type,
@ -336,7 +341,11 @@ os.makedirs(destination_json, exist_ok=True)

 # sauver le json de tous les articles et pages
 if pandoc_runs_counter > 0 or not os.path.exists(json_file) or rebuild_articles_info_json:
-    print(f"\033[91m Les articles ont changé, Génération du json {json_file} \033[0m")
+    print(f"pandoc_runs_counter: {pandoc_runs_counter}")
+    print(f"rebuild_articles_info_json: {rebuild_articles_info_json}")
+    
+    print(f"\033[94m Génération du json {json_file} \033[0m")
+
    with open(  json_file, 'w', encoding='utf-8') as json_file:
        files_dict_serialized = json.dumps(files_dict, ensure_ascii=False, indent=4)
        json_file.write(files_dict_serialized)
@ -445,11 +454,11 @@ Pages:


 # Générer la page d'index seulement si des articles ont été convertis
-if pandoc_runs_counter > 0 or run_gemini:
+if pandoc_runs_counter > 0 or run_gemini or force_html_regen:
    # Appel de la fonction pour générer la page d'index
    generate_blog_index(destination_json + '/articles_info.json', 'templates/html/index.html.jinja', destination_html + 'index.html')

-    print(f"\033[91m index régénéré {destination_html}index.html \033[0m")
+    print(f"\033[94m index régénéré {destination_html}index.html \033[0m")
 # else:
    # #print("Aucun article n'a été converti, la page d'index n'est pas régénérée")

@ -464,57 +473,64 @@ def generate_article_pages(json_file, template_file, output_dir):
    """
    counter_gemini = 0
    print(f"generate_article_pages: ouverture du json {json_file}")
-        # Charger les données JSON
-    with open(json_file, 'r', encoding='utf-8') as f:
-        print('----------------------- yay json chargé')
-        articles_info = json.load(f)
-        # Configurer Jinja2
-        env = Environment(loader=FileSystemLoader('.'))
-        template = env.get_template(template_file)
-        template_content =  get_blog_template_conf(args.blog)
-        
-        print(f"articles count: {len(articles_info.values())}")
-        # Générer les pages pour chaque article
-        for article in articles_info.values():
-            print('----------------------- article', article['title'])
-
-            if article['first_picture_url']:
-                template_content['OG_IMAGE'] = article['first_picture_url']
-            else:
-                template_content['OG_IMAGE'] = template_content['SITE_ICON']
-
-            output_html = template.render(
-                template_content=template_content,
-                article=article,
-                all_articles=articles_info
-            )
-            slug_to_use = article['slug_with_year']
-
-            # Déterminer le slug à utiliser selon le type d'article
-            if 'article_type' in article and article['article_type'] == 'article':
-                slug_to_use = article['slug_with_year']
-            else:
-                slug_to_use = article['slug']
-            # Construire le chemin de sortie html en fonction du slug avec l'année
-            output_subdir = os.path.join(output_dir, slug_to_use)
-            print(f"output_subdir: {output_subdir}")
-            os.makedirs(output_subdir, exist_ok=True)
-            output_file = os.path.join(output_subdir ,"index.html")
-
-            print(f"output_file: {output_file}")
-            # Écrire le fichier de sortie en HTML pour un article
-            with open(output_file, 'w', encoding='utf-8') as f:
-                f.write(output_html)
+    try:
+        with open(json_file, 'r', encoding='utf-8') as f:
+            articles_info = json.load(f)
+            # Configurer Jinja2
+            env = Environment(loader=FileSystemLoader('.'))
+            template = env.get_template(template_file)
+            template_content = get_blog_template_conf(args.blog)
            
-            print(f"Génération de la page gemini pour {article['title']}")
-            if 'gemini_content' in article and len(article['gemini_content']) > 0:
-                # Construire le chemin de sortie gmi en fonction du slug avec l'année
-                save_gemini_file(args.blog, article, articles_info, template_content)
-                counter_gemini += 1
-            else:
-                print(f"----------- on ne génère pas le gemini pour {article['slug']}")
+            print(f"articles count: {len(articles_info.values())}")
+            # Générer les pages pour chaque article
+            for article in articles_info.values():
+                print('----------------------- article', article['title'])
+
+                if article['first_picture_url']:
+                    template_content['OG_IMAGE'] = article['first_picture_url']
+                else:
+                    template_content['OG_IMAGE'] = template_content['SITE_ICON']
+
+                output_html = template.render(
+                    template_content=template_content,
+                    article=article,
+                    all_articles=articles_info
+                )
+                slug_to_use = article['slug_with_year']
+
+                # Déterminer le slug à utiliser selon le type d'article
+                if 'article_type' in article and article['article_type'] == 'article':
+                    slug_to_use = article['slug_with_year']
+                else:
+                    slug_to_use = article['slug']
+                # Construire le chemin de sortie html en fonction du slug avec l'année
+                output_subdir = os.path.join(output_dir, slug_to_use)
+                print(f"output_subdir: {output_subdir}")
+                os.makedirs(output_subdir, exist_ok=True)
+                output_file = os.path.join(output_subdir ,"index.html")
+
+                print(f"output_file: {output_file}")
+                # Écrire le fichier de sortie en HTML pour un article
+                with open(output_file, 'w', encoding='utf-8') as f:
+                    f.write(output_html)
+                
+                print(f"Génération de la page gemini pour {article['title']}")
+                if 'gemini_content' in article and len(article['gemini_content']) > 0:
+                    # Construire le chemin de sortie gmi en fonction du slug avec l'année
+                    save_gemini_file(args.blog, article, articles_info, template_content)
+                    counter_gemini += 1
+                else:
+                    print(f"----------- on ne génère pas le gemini pour {article['slug']}")
 
-        return
+            print(f"\033[94m Nombre d'articles gemini générés : {counter_gemini}\033[0m")
+            return
+    except IOError as e:
+        print(f"Erreur lors de la lecture du fichier {json_file}: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Erreur inattendue lors de la lecture du fichier {json_file}: {e}")
+        sys.exit(1)
+