up regen

2025-06-20 09:04:42 +02:00 · 2025-02-28 18:43:17 +01:00 · 2025-02-28 18:43:17 +01:00 · dbbae888f4
commit dbbae888f4
parent 3bf5856bab
15 changed files with 172 additions and 149 deletions
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -12,6 +12,7 @@ import argparse
 import pypandoc
 from jinja2 import Environment, FileSystemLoader
 import time
 import sys
 # Démarrer le chronomètre
@ -28,7 +29,7 @@ parser.add_argument('--enable_roam_id_rewrite', type=str, help='Activer ou non l
 parser.add_argument('--generate_html_pages', type=str, help='Activer ou non la génération des pages html', default=True)
 parser.add_argument('--generate_linkings_json', type=str, help='Activer ou non la génération du json des liens entre articles', default=True)
 parser.add_argument('--force_html_regen', action='store_true', help='Forcer la régénération des fichiers HTML même s\'ils existent déjà', default=False)
-parser.add_argument('--rebuild_articles_info_json', type=str, help='Chemin du fichier JSON des articles', default=True)
+parser.add_argument('--rebuild_articles_info_json', type=str, help='Reconstruire le fichier de données JSON des articles', default=False)
 args = parser.parse_args()
 run_gemini = args.run_gemini
@ -70,7 +71,7 @@ if rebuild_articles_info_json:
 files_dict = {}
 if os.path.exists(json_file):
-    #print(f"Chargement du fichier JSON existant: {json_file}")
+    print(f" ============== Chargement du fichier JSON existant: {json_file}")
    try:
        with open(json_file, 'r', encoding='utf-8') as f:
            files_dict = json.load(f)
@ -79,7 +80,7 @@ if os.path.exists(json_file):
        print(f"Erreur lors du chargement du fichier JSON: {e}")
 else:
-    print(f"Aucun fichier articles_info.json existant trouvé, reconstruction des informations du blog {args.blog}")
+    print(f"\033[91m ============== Aucun fichier articles_info.json existant trouvé, reconstruction des informations du blog {args.blog}\033[0m")
@ -151,6 +152,7 @@ if generate_linkings_json :
                                basename = get_basename(file_name)
                                date_str, annee, slug = find_year_and_slug_on_filename(basename)
                                slug = slugify_title(title)
                                slug_with_year = f"{annee}/{slug}"
                                tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
                                # Convertir les tags en liste si c'est un set
@ -206,10 +208,10 @@ if generate_linkings_json :
                                    rebuild_this_article_html = file_modified_time > last_html_build_time
                                    if rebuild_this_article_html:
-                                        print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
+                                        print(f"\033[91m--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}\033[0m")
                                    else:
-                                        print(f"--------- article non modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}, on ne recrée pas")
+                                        print(f"\033[91m--------- article non modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}, on ne recrée pas\033[0m")
-                                        rebuild_this_article_html = False
+                                        
                                else:
                                    # si il n'y a pas de fichier html, on le construit pour la première fois
                                    print('on reconstruit le html de l\'article', file_name)
@ -219,17 +221,25 @@ if generate_linkings_json :
                                    rebuild_counter += 1
                                print(f"slug_with_year: {slug_with_year}")
                                # Afficher les clés de files_dict pour débogage
                                # print("\nClés disponibles dans files_dict:")
                                # for key in files_dict.keys():
                                #     print(f"- {key}")
                                # print("\n")
                                # Garder le contenu HTML existant si déjà présent
-                                if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
+                                if not rebuild_this_article_html and slug_with_year in files_dict and 'html_content' in files_dict[slug_with_year]:
-                                    #print('on reprend le contenu html existant')
+                                    print('========= on reprend le contenu html existant')
-                                    if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0 :
+                                    len_html = len(files_dict[slug_with_year]['html_content'])
-                                        html_content = files_dict[f"{annee}/{slug}"]['html_content']
+                                    print(f"len_html: {len_html}")
-                                    if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0 :
+                                    if len_html > 0 :
-                                        html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
+                                        html_content = files_dict[slug_with_year]['html_content']
                                    else:
                                        html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
                                    if len(files_dict[slug_with_year]['html_content_without_h1']) > 0 :
                                        html_content_without_h1 = files_dict[slug_with_year]['html_content_without_h1']
                                else:
-                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc : {title} en html\033[0m")
+                                    print('========= pas de contenu html existant')
                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc html_content : {title} en html\033[0m")
                                    pandoc_runs_counter += 1
                                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                                    html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
@ -237,6 +247,9 @@ if generate_linkings_json :
                                if run_pandoc and rebuild_this_article_html or force_html_regen:
                                    print(f"run_pandoc: {run_pandoc}")
                                    print(f"rebuild_this_article_html: {rebuild_this_article_html}")
                                    print(f"force_html_regen: {force_html_regen}")
                                    # convertir le contenu d'article org vers html
                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR pandoc : {title} en html\033[0m")
                                    # print(f"\033[91m.\033[0m", end='', flush=True)
@ -244,26 +257,18 @@ if generate_linkings_json :
                                        html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                                        html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
                                        pandoc_runs_counter += 1
                                # else:
                                #     html_content = content_without_h1
                                # if run_gemini and rebuild_this_article_gemini:
                                #     #print('-----------on régénère le gemini')
                                #     # convertir le contenu d'article org vers gmi pour la capsule gemini
                                #     gemini_content = org_to_gmi(content_without_h1)
                                #     #print('len(gemini_content)', len(gemini_content))
                                # else:
                                #     print('-----------on ne régénère pas le gemini')
                                if rebuild_this_article_gemini:
                                    print(f"\033[91m {time.strftime('%H:%M:%S')} BRRRRRRRRRRRRR gemini : {title} en gmi\033[0m")
                                    pandoc_runs_counter += 1
                                    gemini_content = org_to_gmi(content_without_h1)
-                                files_dict[f"{annee}/{slug}"] = {
+                                files_dict[slug_with_year] = {
                                    'path': file_path,
                                    'basename': basename,
                                    'roam_id': find_org_roam_id(content),
                                    'slug': f"{slug}/",
-                                    'slug_with_year': f"{annee}/{slug}",
+                                    'slug_with_year': slug_with_year,
                                    'date': boom[0],
                                    'lang': lang_folder,
                                    'article_type': article_type,
@ -336,7 +341,11 @@ os.makedirs(destination_json, exist_ok=True)
 # sauver le json de tous les articles et pages
 if pandoc_runs_counter > 0 or not os.path.exists(json_file) or rebuild_articles_info_json:
-    print(f"\033[91m Les articles ont changé, Génération du json {json_file} \033[0m")
+    print(f"pandoc_runs_counter: {pandoc_runs_counter}")
    print(f"rebuild_articles_info_json: {rebuild_articles_info_json}")
    print(f"\033[94m Génération du json {json_file} \033[0m")
    with open(  json_file, 'w', encoding='utf-8') as json_file:
        files_dict_serialized = json.dumps(files_dict, ensure_ascii=False, indent=4)
        json_file.write(files_dict_serialized)
@ -445,11 +454,11 @@ Pages:
 # Générer la page d'index seulement si des articles ont été convertis
-if pandoc_runs_counter > 0 or run_gemini:
+if pandoc_runs_counter > 0 or run_gemini or force_html_regen:
    # Appel de la fonction pour générer la page d'index
    generate_blog_index(destination_json + '/articles_info.json', 'templates/html/index.html.jinja', destination_html + 'index.html')
-    print(f"\033[91m index régénéré {destination_html}index.html \033[0m")
+    print(f"\033[94m index régénéré {destination_html}index.html \033[0m")
 # else:
    # #print("Aucun article n'a été converti, la page d'index n'est pas régénérée")
@ -464,9 +473,8 @@ def generate_article_pages(json_file, template_file, output_dir):
    """
    counter_gemini = 0
    print(f"generate_article_pages: ouverture du json {json_file}")
-        # Charger les données JSON
+    try:
        with open(json_file, 'r', encoding='utf-8') as f:
        print('----------------------- yay json chargé')
            articles_info = json.load(f)
            # Configurer Jinja2
            env = Environment(loader=FileSystemLoader('.'))
@ -514,7 +522,15 @@ def generate_article_pages(json_file, template_file, output_dir):
                else:
                    print(f"----------- on ne génère pas le gemini pour {article['slug']}")
            print(f"\033[94m Nombre d'articles gemini générés : {counter_gemini}\033[0m")
            return
    except IOError as e:
        print(f"Erreur lors de la lecture du fichier {json_file}: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"Erreur inattendue lors de la lecture du fichier {json_file}: {e}")
        sys.exit(1)
--- a/new_article.py
+++ b/new_article.py
@ -1,12 +1,19 @@
 #!/usr/bin/env python3
 # ----------------------------------
 # Création de nouvel article de blog
 # ----------------------------------
 # Exemple de commande :
-#   python new_article.py [blog_dir] [lang] "article_title"
+#
 #   python new_article.py --blog_dir=cipherbliss_blog --lang=fr --title="article_title"
 #
 ################################
 import os
 from datetime import datetime
 import argparse
 import sys
 from website_config import configs_sites
 # Configuration des arguments de la ligne de commande
 parser = argparse.ArgumentParser(description="Générer un nouvel article en mode orgmode.")
 parser.add_argument("--title", nargs="?", help="Le titre de l'article.")
@ -111,6 +118,7 @@ with open(filename, "w") as f:
    config={
        'uuid': uuid,
        'slug': slug,
        'NDD': configs_sites[args.blog_dir]['NDD'],
        'title': args.title,
        'date_string_full': date_string_full,
        'date_string': date_string,
@ -118,6 +126,11 @@ with open(filename, "w") as f:
        'blog_dir': args.blog_dir,
        'lang': args.lang,  # Ajout de la langue dans la config
    }
-    f.write(make_article(config))
+    content = make_article(config)
    if content:
        f.write(content)
    else:
        print("Erreur: Impossible de générer le contenu de l'article.")
        exit(1)
 print(f"Le fichier '{filename}' a été créé avec succès.")
--- a/sources/cipherbliss_blog/lang_en/2024-11-03-creation-of-a-gemini-blog.org
+++ b/sources/cipherbliss_blog/lang_en/2024-11-03-creation-of-a-gemini-blog.org
@ -1,12 +0,0 @@
 :PROPERTIES:
 :ID:       1e32138a-680c-4002-8533-923ae0383f15
 :END:
 #+TITLE: Creation of a gemini blog
 #+CREATED: 2024-11-03 12:43:17
 #+SLUG: creation-of-a-gemini-blog
 #+BLOG: cipherbliss_blog
 * Creation of a gemini blog
 [2024-11-03]
--- a/sources/cipherbliss_blog/lang_fr/20250228152822__wololo!-convertir-de-l'open-data-en-geojson-vers-des-tags-openstreetmap.org
+++ b/sources/cipherbliss_blog/lang_fr/20250228152822__wololo!-convertir-de-l'open-data-en-geojson-vers-des-tags-openstreetmap.org
@ -1,19 +1,23 @@
 #+TITLE: Wololo! convertir de l'open data en geojson vers des tags openstreetmap
 #+DATE: 2025-02-28 15:28:22
 #+AUTHOR: 
 #+EMAIL: 
 #+LANGUAGE: fr
 #+OPTIONS: toc:nil num:nil
 #+STARTUP: showall
 #+PERMALINK: 2025/wololo!-convertir-de-l'open-data-en-geojson-vers-des-tags-openstreetmap
 #+ID: fb14f129-8ef1-4b6f-ac74-66e5328df297
 * Article
 :PROPERTIES:    
 :ID:       fb14f129-8ef1-4b6f-ac74-66e5328df297
 :END:
 #+post_slug: wololo-convertir-de-l-open-data-en-geojson-vers-des-tags-openstreetmap
 #+post_lang: fr
 #+post_url: https://www.cipherbliss.com/2025/wololo!-convertir-de-l'open-data-en-geojson-vers-des-tags-openstreetmap
 #+post_title: Wololo! convertir de l'open data en geojson vers des tags openstreetmap
 #+post_tags: 
 #+post_series: 
 #+post_type: post
 #+post_status: publish
 #+post_date_published: <2025-02-28 15:28:22>
 #+post_date_modified: <2025-02-28 15:28:22>
 #+post_index_page_roam_id: fb14f129-8ef1-4b6f-ac74-66e5328df297
 * Wololo! convertir de l'open data en geojson vers des tags openstreetmap
 #+ATTR_HTML: :alt wololo monk from age of empire image :title Action! :align center
 [[https://i.etsystatic.com/38612687/r/il/692ff8/5340918389/il_fullxfull.5340918389_fgfn.jpg]]
 ** Wololo! convertir de l'open data en geojson vers des tags openstreetmap
 L'open data concernant les choses situées géographiquement est une mine d'or d'informations mal foutues. Les transformer en vue de les comparer à ce qui existe dans OpenStreetMap est un travail qui a été réalisé de plusieurs façons par bien des personnes différentes.
@ -199,3 +203,5 @@ bash update_scripts/run_all_extractors.sh
 #+end_src
 Pour ne pas blinder l'espace disque de la forge logicielle, les extractions et les fichiers geojson transformés ne sont pas versionnés, merci au Gitignore.
 Bonnes conversions de données ouvertes!
--- a/sources/cipherbliss_blog/templates/contact.org
+++ b/sources/cipherbliss_blog/templates/contact.org
@ -1,3 +0,0 @@
 * Contact
 contact@cipherbliss.com
--- a/sources/cipherbliss_blog/templates/footer-articles.org
+++ b/sources/cipherbliss_blog/templates/footer-articles.org
@ -1,8 +0,0 @@
 ----------------
 Écrit par tykayn.
 $$DATE_ARTICLE$$
 Si vous aimez ce que nous faisons, soutenez nous.
--- a/sources/cipherbliss_blog/templates/footer_page.org
+++ b/sources/cipherbliss_blog/templates/footer_page.org
@ -1,15 +0,0 @@
 ---------------
 Si vous aimez ce que nous faisons à Cipher Bliss, vous pouvez nous soutenir de plusieurs façons: en faisant un micro don sur liberapay , ou en cryptomonnaies. Le plus simple nous pour nous faire connaître étant de partager cet article.
 Suivez moi sur Mastodon @tykayn@mastodon.cipherbliss.com.
 Ce site restera libre comme un gnou dans la nature et sans pubs, parce qu'on vous aime. Que la source soit avec vous!
 Cipher Bliss, entreprise individuelle en ingénierie informatique spécialisé dans le front-end, à Orsay.
 Faire un don à CipherBliss :
 [[https://liberapay.com/cipherbliss/donate]]
--- a/sources/cipherbliss_blog/templates/header_page.org
+++ b/sources/cipherbliss_blog/templates/header_page.org
--- a/stats.py
+++ b/stats.py
@ -1,7 +1,7 @@
 #!/bin/python3
 # Générer des statistiques sur tous les sites web
-from utils import get_stats_on_all_websites
+from utils.utils import get_stats_on_all_websites
 from website_config import configs_sites
 def main():
--- a/templates/html/article.html.jinja
+++ b/templates/html/article.html.jinja
@ -25,6 +25,13 @@
    <meta name="description" content="{{template_content['PAGE_TITLE']}}">
    <meta name="reply-to" content="{{template_content['EMAIL']}}">
    <link rel="icon" type="{{template_content['SITE_ICON_TYPE']}}" href="{{template_content['SITE_ICON']}}">
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css">
 <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
 <!-- and it's easy to individually load additional languages -->
 <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/go.min.js"></script>
 <script>hljs.highlightAll();</script>
 </head>
 <body>
@ -127,7 +134,7 @@
                        <a href="{{template_content['NDD']}}/feed/">Flux Atom</a>
                    </nav>
                    <div class="site-foot-source">
-                    {{template_content['SOURCE']}}
+                    <a href="{{template_content['SOURCE']}}">source du blog</a>
                    </div>
                </div>
            </div>
--- a/templates/html/index.html.jinja
+++ b/templates/html/index.html.jinja
@ -25,6 +25,13 @@
    <meta name="description" content="{{template_content['PAGE_TITLE']}}">
    <meta name="reply-to" content="{{template_content['EMAIL']}}">
    <link rel="icon" type="{{template_content['SITE_ICON_TYPE']}}" href="{{template_content['SITE_ICON']}}">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css">
 <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
 <!-- and it's easy to individually load additional languages -->
 <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/go.min.js"></script>
 <script>hljs.highlightAll();</script>
 </head>
 <body>
--- a/test_org_conversion.py
+++ b/test_org_conversion.py
@ -1,5 +1,5 @@
 import pytest
-from utils import convert_org_to_gemini
+from utils.utils import convert_org_to_gemini
 def test_org_to_gemini_conversion():
    # Exemple de contenu org
--- a/test_utils.py
+++ b/test_utils.py
@ -1,5 +1,5 @@
 import pytest
-from utils import slugify_title
+from utils.utils import slugify_title
 def test_slugify_title_with_accents():
    """Test que slugify conserve les accents francophones"""
--- a/utils/maker.py
+++ b/utils/maker.py
@ -3,22 +3,26 @@ def make_article(config):
    """
    Génère le contenu d'un nouvel article org-mode
    """
-    template = f"""#+TITLE: {config['title']}
+    template = f""":PROPERTIES:
-#+DATE: {config['date_string_full']}
+:ID:       2d3dea63-7567-4def-80d9-c9bd82d41eed
 #+AUTHOR: 
 #+EMAIL: 
 #+LANGUAGE: {config['lang'] if 'lang' in config else 'fr'}
 #+OPTIONS: toc:nil num:nil
 #+STARTUP: showall
 #+PERMALINK: {config['schema_slug']}
 #+ID: {config['uuid']}
 * Article
 :PROPERTIES:
 :ID: {config['uuid']}
 :END:
-** {config['title']}
+#+title: {config['title']}
 #+post_ID: 
 #+post_slug: {config['slug']}
 #+post_lang: {config['lang'] if 'lang' in config else 'fr'}
 #+post_url: {config['NDD']}{config['schema_slug']}
 #+post_title: {config['title']}
 #+post_tags: 
 #+post_series: 
 #+post_type: post
 #+post_status: publish
 #+post_date_published: <{config['date_string_full']}>
 #+post_date_modified: <{config['date_string_full']}>
 #+post_index_page_roam_id: {config['uuid']}
 * {config['title']}
 """
    return template
--- a/website_config.py
+++ b/website_config.py
@ -67,6 +67,14 @@ configs_sites = {
        "EMAIL": "contact@cipherbliss.com",
        "SITE_ICON": "https://www.cipherbliss.com/wp-content/uploads/2016/12/rond.png",
        "SITE_ICON_TYPE": "image/png",
        "SOUTIEN": """
 Si vous aimez ce que nous faisons à Cipher Bliss, vous pouvez nous soutenir de plusieurs façons: en faisant un micro don sur liberapay , ou en cryptomonnaies. Le plus simple nous pour nous faire connaître étant de partager cet article.
 <br/>Suivez moi sur Mastodon <a href="https://mastodon.cipherbliss.com/@tykayn">@tykayn@mastodon.cipherbliss.com</a>.
 <br/>Ce site restera libre comme un gnou dans la nature et sans pubs, parce qu'on vous aime. Que la source soit avec vous!
 <br/>Cipher Bliss, entreprise individuelle en ingénierie informatique spécialisé dans le front-end, à Orsay.
 <br/>Faire un don à CipherBliss :
 <a href="https://liberapay.com/cipherbliss/donate">https://liberapay.com/cipherbliss/donate</a>
 """,
        "NAVIGATION": """
            <nav>
                <a href="/">Accueil</a>