build des pages gemini dans linking_articles_prev_next, correction en masse https dans les articles

2025-06-20 09:04:42 +02:00 · 2025-02-21 23:40:35 +01:00 · 2025-02-21 23:40:35 +01:00 · aa00ae2cfc
commit aa00ae2cfc
parent 619ba68dcc
1641 changed files with 12787 additions and 14211 deletions
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -15,10 +15,13 @@ import time  # Importer le module time

 # Démarrer le chronomètre
 start_time = time.time()
-# pour tester
+# Configs pour tester
 generate_linkings_json = True
 generate_articles = True
 run_pandoc = True # le plus long quand on a beaucoup d'articles
+run_pandoc = False
+
+run_gemini = True

 # Configurer argparse pour prendre le blog en argument
 parser = argparse.ArgumentParser(description='Générer une liste des derniers articles de blog.')
@ -33,6 +36,8 @@ def get_basename(file_name):
 directory = f'sources/{args.blog}/lang_fr'
 destination_json = f'sources/{args.blog}/build'
 destination_html = f'html-websites/{args.blog}/'
+destination_gmi = f'gemini-capsules/{args.blog}/'
+
 # Dictionnaire pour stocker les informations des fichiers
 files_dict = {}

@ -45,6 +50,47 @@ def get_first_picture_url(content):
    else:
        return None

+
+def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
+    """
+    Convertit un texte au format Org en un fichier au format GMI (Gemini)
+    en utilisant pypandoc.
+
+    Args:
+    - org_text (str): Le texte au format Org à convertir.
+    - output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
+    """
+    output = """
+# mock land output
+===========
+
+blah blah blah
+
+-----------------
+Tykayn blog mock content
+-----------------
+
+Navigation:
+
+=> accueil.gmi Accueil 
+=> a-propos.gmi à propos
+    """
+    # Conversion du texte Org en GMI via Pandoc
+    try:
+        output = pypandoc.convert_text(org_text, 'markdown', format='org')
+    except RuntimeError as e:
+        print(f"Erreur de conversion : {e}")
+        return
+
+    # Sauvegarde du contenu GMI dans un fichier
+    try:
+        with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
+            f.write(output)
+        print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
+    except OSError as e:
+        print(f"Erreur lors de la sauvegarde du fichier : {e}")
+    return output
+
 if generate_linkings_json :

    # Parcourir les fichiers du dossier
@ -69,13 +115,19 @@ if generate_linkings_json :
                # Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
                content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)

+                gemini_content = ''
+                html_content = ''
+
                if run_pandoc:
-                    
-                 html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
+                    # convertir le contenu d'article org vers html
+                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                else:
                    html_content = content_without_h1

-                # html_content = pypandoc.convert_text(content, 'html', format='org')
+                if run_gemini:
+                    os.makedirs(destination_gmi, exist_ok=True)
+                    # convertir le contenu d'article org vers gmi pour la capsule gemini
+                    gemini_content = org_to_gmi(content_without_h1, slug)

                files_dict[f"{annee}/{slug}"] = {
                    'path': file_path,
@ -149,17 +201,55 @@ def generate_blog_index(json_file, template_file, output_file):

    articles_others = sorted(articles_info.values(), key=lambda x: x['date'], reverse=True)[10:]
    # Rendre le template avec les données
-    output_html = template.render(
+    output_index_html = template.render(
        template_content=configs_sites[args.blog],
        articles=sorted_articles[:global_config['posts_per_page']],
        articles_others=articles_others
    )

-    # Écrire le fichier de sortie
-    with open(output_file, 'w', encoding='utf-8') as f:
-        f.write(output_html)
+    gmi_list_articles = ''

-    print(f"Page d'index générée dans {output_file}")
+    for basename, article in files_dict.items():
+
+        # gmi_list_articles += f"\n=> article.gmi"
+        gmi_list_articles += f"\n=> {article.slug}.gmi {article.date_formatee} {article.title}"
+        
+    output_index_gmi = f"""
+# {global_config['args.blog']['BLOG_TITLE']}
+===============================================
+
+{global_config['args.blog']['BANNIERE_ENTETE']}
+Par {global_config['args.blog']['AUTHOR']}
+Dernière mise à jour: {datetime.now()}
+***********************************************
+
+{global_config['args.blog']['DESCRIPTION']}
+
+**************************************************************
+
+{global_config['args.blog']['SITE_ICON']}
+
+
+
+---------------------
+Index des {len(files_dict.items())} articles:
+
+{gmi_list_articles}
+
+---------------------
+Pages:
+=> index.gmi
+    """
+
+    gmi_index_file=destination_gmi+'index.gmi'
+
+    # Écrire le fichier de sortie en html et en gmi
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(output_index_html)
+        print(f"Page d'index générée dans {output_file}")
+    with open(gmi_index_file, 'w', encoding='utf-8') as f:
+        f.write(output_index_gmi)
+        print(f"Page d'index gemini générée dans {gmi_index_file}")

 # Appel de la fonction pour générer la page d'index
 generate_blog_index(destination_json + '/articles_info.json', 'templates/html/index.html.jinja', destination_html + '/index.html')
@ -194,10 +284,6 @@ def generate_article_pages(json_file, template_file, output_dir):
            # Construire le chemin de sortie en fonction du slug avec l'année
            output_subdir = os.path.join(output_dir, article['slug_with_year'])

-            # print('make subdir: ',output_subdir)
-
-
-
            os.makedirs(output_subdir, exist_ok=True)
            output_file = os.path.join(output_subdir ,"index.html")