do not recreate not modified files already built by default

2025-06-20 09:04:42 +02:00 · 2025-02-23 20:09:39 +01:00 · 2025-02-23 20:09:39 +01:00 · 17e935a3ca
commit 17e935a3ca
parent e1f8bf93e0
3 changed files with 36 additions and 15 deletions
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -95,6 +95,8 @@ Navigation:
 if generate_linkings_json :
    count_articles = len(os.listdir(directory))
    counter=0
+    rebuild_counter = 0
+    pandoc_runs_counter = 0
    
    print(f"Génération des liens entre articles pour {count_articles} articles")
    print(f"run_pandoc: {run_pandoc}")
@ -128,10 +130,15 @@ if generate_linkings_json :
                gemini_content = ''
                html_content = ''
                                # Vérifier l'existence du fichier HTML pour déterminer last_html_build
-                html_path = f"html_websites/{args.blog}/{annee}/{slug}/index.html"
-                last_html_build = None
+                html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
+                last_html_build_time = None
                if os.path.exists(html_path):
-                    last_html_build = time.ctime(os.path.getmtime(html_path))
+                    # Obtenir la date de création du fichier HTML
+                    last_html_build_time = os.path.getctime(html_path)
+
+                    print(f"last_html_build: {last_html_build_time} : {html_path}")
+                else:
+                    print(f"----------- last_html_build html_path: {html_path} n'existe pas")
                # Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
                gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
                last_gemini_build = None
@ -144,21 +151,32 @@ if generate_linkings_json :
                    last_build_time = time.mktime(time.strptime(last_gemini_build))
                    rebuild_this_article_gemini = file_modified_time > last_build_time
                else:
+                    
                    rebuild_this_article_gemini = True

                # Vérifier si l'article doit être reconstruit en comparant les dates de modification
                rebuild_this_article_html = False
-                if last_html_build:
+                if last_html_build_time:
                    file_modified_time = os.path.getmtime(file_path)
-                    last_build_time = time.mktime(time.strptime(last_html_build))
-                    rebuild_this_article_html = file_modified_time > last_build_time
+                    print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
+                    # Obtenir l'heure de dernière modification du fichier HTML
+                    
+                    rebuild_this_article_html = file_modified_time > last_html_build_time
+                    # print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
                else:
+                    # si il n'y a pas de fichier html, on le construit pour la première fois
+                    print('on reconstruit le html de l\'article', file_name)
+                    
                    rebuild_this_article_html = True
+                
+                if rebuild_this_article_html:
+                    rebuild_counter += 1

                if run_pandoc and rebuild_this_article_html:
                    # convertir le contenu d'article org vers html
-                    print(f"{time.strftime('%H:%M:%S')} : Conversion de {file_name} en html")
+                    print(f"BRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html")
                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
+                    pandoc_runs_counter += 1
                else:
                    html_content = content_without_h1

@ -184,13 +202,15 @@ if generate_linkings_json :
                    'title': title,
                    'next': None,
                    'previous': None,
-                    'last_html_build': last_html_build,
+                    'last_html_build': last_html_build_time,
                    'last_gemini_build': last_gemini_build,
                    'org_content': content,  # Contenu Org original
                    'html_content_without_h1': re.sub(r'<h1>.*?</h1>', '', html_content),  # Contenu HTML converti sans le titre de premier niveau
                    'html_content': html_content  # Contenu first_picture_urlHTML converti
                }

+    print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
+    print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")
 # Trier les basenames par ordre décroissant
 sorted_basenames = sorted(files_dict.keys(), reverse=True)
 print(len(sorted_basenames), 'articles trouvés')