convert to gemini avec md2gemini, conversion de plusieurs langues

2025-11-19 23:00:35 +01:00 · 2025-02-27 16:18:47 +01:00 · 2025-02-27 16:18:47 +01:00 · bba1df0377
commit bba1df0377
parent 255e8fdc04
10 changed files with 462 additions and 202 deletions
--- a/gather_tags_in_json.py
+++ b/gather_tags_in_json.py
@ -67,6 +67,9 @@ def group_files_by_tags(org_files, excluded_tags):
        for tag in tags:
            tag_to_files[tag].add(slug)
    # Sauvegarder les fichiers sans tags
    save_untagged_files(output_file=f"sources/{blog_folder}/build/articles_without_tags.json")
    return tag_to_files
--- a/install.sh
+++ b/install.sh
@ -1,4 +1,4 @@
 #!/bin/bash
 sudo apt install pandoc python3-pip npm
-pip install uuid argparse os md2gemini
+pip install uuid argparse os md2gemini pytest pypandoc
-npm install -g sass
+npm install -g sass
--- a/linking_articles_prev_next.py
+++ b/linking_articles_prev_next.py
@ -42,7 +42,12 @@ def get_basename(file_name):
    return os.path.splitext(file_name)[0]
 # Chemin du dossier contenant les fichiers orgmode
-directory = f'sources/{args.blog}/lang_fr'
+directory_pages = f'sources/{args.blog}/'
 directory_fr = f'sources/{args.blog}/lang_fr'
 directory_en = f'sources/{args.blog}/lang_en'
 directories_to_scan = [directory_pages, directory_fr, directory_en]
 destination_json = f'sources/{args.blog}/build'
 destination_html = f'html-websites/{args.blog}/'
 destination_gmi = f'gemini-capsules/{args.blog}/'
@ -64,188 +69,157 @@ else:
    files_dict = {}
 def get_first_picture_url(content):
    # Utiliser une expression régulière pour trouver la première URL d'image dans le contenu
    pattern = r'\[\[(.*?)\]\]'
    match = re.search(pattern, content)
    if match:
        return match.group(1)
    else:
        return None
-def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
+count_articles = count_files_in_directories(directories_to_scan)
    """
    Convertit un texte au format Org en un fichier au format GMI (Gemini)
    en utilisant pypandoc.
    Args:
    - org_text (str): Le texte au format Org à convertir.
    - output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
    """
    output = """
 # mock land output
 ===========
 blah blah blah
 -----------------
 Tykayn blog mock content
 -----------------
 Navigation:
 => accueil.gmi Accueil 
 => a-propos.gmi à propos
    """
    # Conversion du texte Org en GMI via Pandoc
    try:
        output = pypandoc.convert_text(org_text, 'markdown', format='org')
    except RuntimeError as e:
        print(f"Erreur de conversion : {e}")
        return
    # Sauvegarde du contenu GMI dans un fichier
    try:
        with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
            f.write(output)
        print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
    except OSError as e:
        print(f"Erreur lors de la sauvegarde du fichier : {e}")
    return output
 count_articles = len(os.listdir(directory))
 counter=0
 rebuild_counter = 0
 pandoc_runs_counter = 0
 lang_folder = global_config.get('lang_default', 'fr')
 if generate_linkings_json :
    print(f"Génération des liens entre articles pour {count_articles} articles")
    print(f"run_pandoc: {run_pandoc}")
    print(f"run_gemini: {run_gemini}")
-
+    article_type = "article"
    # Parcourir les fichiers du dossier
-    for file_name in os.listdir(directory):
+    
-        if file_name.endswith('.org'):
+    for index, directory in enumerate(directories_to_scan):
-            counter+=1
+        # Déterminer le type d'article en fonction du chemin
-            if force_html_regen and counter % 10 == 0:
+        if directory == '/':
-                print(f"{time.strftime('%H:%M:%S')} : Articles traités : {counter}/{count_articles}")
+            article_type = "page"
-            file_path = os.path.join(directory, file_name)
+        else:
-            with open(file_path, "r", encoding="utf-8") as f:
+            article_type = "article"
-                content = f.read()
+        # Extraire la langue du dossier si elle commence par "lang_"
-                date_modified = time.ctime(os.path.getmtime(file_path))
+        if directory.split('/')[-1].startswith('lang_'):
            lang_folder = directory.split('/')[-1][5:]  # Prend les caractères après "lang_"
        for file_name in os.listdir(directory):
            if file_name.endswith('.org'):
                counter+=1
                if force_html_regen and counter % 10 == 0:
                    print(f"{time.strftime('%H:%M:%S')} : Articles traités : {counter}/{count_articles}")
                file_path = os.path.join(directory, file_name)
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                    date_modified = time.ctime(os.path.getmtime(file_path))
-                basename = get_basename(file_name)
+                    basename = get_basename(file_name)
-                date_str, annee, slug = find_year_and_slug_on_filename(basename)
+                    date_str, annee, slug = find_year_and_slug_on_filename(basename)
-                tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
+                    tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
                # Convertir les tags en liste si c'est un set
                if isinstance(tags, set):
                    tags = list(tags)
                boom = basename.split('__')
                # Convertir le contenu Org en HTML
                title = find_first_level1_title(content)
                # Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
                content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)
                gemini_content = ''
                html_content = ''
                html_content_without_h1 = ''
                # Vérifier l'existence du fichier HTML pour déterminer last_html_build
                html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
                last_html_build_time = None
                if os.path.exists(html_path):
                    # Obtenir la date de création du fichier HTML
                    last_html_build_time = os.path.getctime(html_path)
                    # print(f"last_html_build: {last_html_build_time} : {html_path}")
                else:
                    print(f"----------- last_html_build html_path: {html_path} n'existe pas")
                # Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
                gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
                last_gemini_build = None
                rebuild_this_article_gemini = False
                if os.path.exists(gemini_path):
                    last_gemini_build = time.ctime(os.path.getmtime(gemini_path))
                # Vérifier si l'article doit être reconstruit en comparant les dates de modification
                if last_gemini_build:
                    file_modified_time = os.path.getmtime(file_path)
                    last_build_time = time.mktime(time.strptime(last_gemini_build))
                    rebuild_this_article_gemini = file_modified_time > last_build_time
                else:
-                    rebuild_this_article_gemini = True
+                    # Convertir les tags en liste si c'est un set
                    if isinstance(tags, set):
                        tags = list(tags)
                    boom = basename.split('__')
                    # Convertir le contenu Org en HTML
                    title = find_first_level1_title(content)
-                # Vérifier si l'article doit être reconstruit en comparant les dates de modification
+                    # Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
-                rebuild_this_article_html = False
+                    content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)
                if last_html_build_time:
                    file_modified_time = os.path.getmtime(file_path)
                    # print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
                    # Obtenir l'heure de dernière modification du fichier HTML
                    rebuild_this_article_html = file_modified_time > last_html_build_time
                    # print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
                else:
                    # si il n'y a pas de fichier html, on le construit pour la première fois
                    print('on reconstruit le html de l\'article', file_name)
                    rebuild_this_article_html = True
                if rebuild_this_article_html:
                    rebuild_counter += 1
                    gemini_content = ''
                    html_content = ''
                    html_content_without_h1 = ''
                    # Vérifier l'existence du fichier HTML pour déterminer last_html_build
                    html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
                    last_html_build_time = None
                    if os.path.exists(html_path):
                        # Obtenir la date de création du fichier HTML
                        last_html_build_time = os.path.getctime(html_path)
-                # Garder le contenu HTML existant si déjà présent
+                        # print(f"last_html_build: {last_html_build_time} : {html_path}")
                if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
                    print('on reprend le contenu html existant')
                    if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0:
                        html_content = files_dict[f"{annee}/{slug}"]['html_content']
                    if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0:
                        html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
                    else:
-                        html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
+                        print(f"----------- last_html_build html_path: {html_path} n'existe pas")
                    # Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
                    gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
                    last_gemini_build = None
                    rebuild_this_article_gemini = False
                    if os.path.exists(gemini_path):
                        last_gemini_build = time.ctime(os.path.getmtime(gemini_path))
                    # Vérifier si l'article doit être reconstruit en comparant les dates de modification
                    if last_gemini_build:
                        file_modified_time = os.path.getmtime(file_path)
                        last_build_time = time.mktime(time.strptime(last_gemini_build))
                        rebuild_this_article_gemini = file_modified_time > last_build_time
                    else:
                        rebuild_this_article_gemini = True
-                if run_pandoc and rebuild_this_article_html or force_html_regen:
+                    # Vérifier si l'article doit être reconstruit en comparant les dates de modification
-                    # convertir le contenu d'article org vers html
+                    rebuild_this_article_html = False
-                    print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
+                    if last_html_build_time:
                        file_modified_time = os.path.getmtime(file_path)
                        # print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
                        # Obtenir l'heure de dernière modification du fichier HTML
                        rebuild_this_article_html = file_modified_time > last_html_build_time
                        # print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
                    else:
                        # si il n'y a pas de fichier html, on le construit pour la première fois
                        print('on reconstruit le html de l\'article', file_name)
                        rebuild_this_article_html = True
-                    html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
+                    if rebuild_this_article_html:
-                    pandoc_runs_counter += 1
+                        rebuild_counter += 1
                else:
                    html_content = content_without_h1
-                if run_gemini and rebuild_this_article_gemini:
+
-                    os.makedirs(destination_gmi, exist_ok=True)
+                    # Garder le contenu HTML existant si déjà présent
-                    # convertir le contenu d'article org vers gmi pour la capsule gemini
+                    if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
-                    gemini_content = org_to_gmi(content_without_h1, slug)
+                        print('on reprend le contenu html existant')
                        if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0:
                            html_content = files_dict[f"{annee}/{slug}"]['html_content']
                        if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0:
                            html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
                        else:
                            html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
                    if run_pandoc and rebuild_this_article_html or force_html_regen:
                        # convertir le contenu d'article org vers html
                        # print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
                        print(f"\033[91m.\033[0m", end='', flush=True)
                        html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
                        pandoc_runs_counter += 1
                    else:
                        html_content = content_without_h1
                    if run_gemini and rebuild_this_article_gemini:
                        os.makedirs(destination_gmi, exist_ok=True)
                        # convertir le contenu d'article org vers gmi pour la capsule gemini
                        print(f"Conversion de {file_name} en gemini")
                        gemini_content = org_to_gmi(content_without_h1, slug)
-                files_dict[f"{annee}/{slug}"] = {
+                    files_dict[f"{annee}/{slug}"] = {
-                    'path': file_path,
+                        'path': file_path,
-                    'basename': basename,
+                        'basename': basename,
-                    'roam_id': find_org_roam_id(content),
+                        'roam_id': find_org_roam_id(content),
-                    'slug': f"{slug}/",
+                        'slug': f"{slug}/",
-                    'slug_with_year': f"{annee}/{slug}",
+                        'slug_with_year': f"{annee}/{slug}",
-                    'date': boom[0],
+                        'date': boom[0],
-                    'date_modified' : date_modified,
+                        'lang': lang_folder,
-                    'first_picture_url' : get_first_picture_url(content),
+                        'article_type': article_type,
-                    'date_formattee': datetime.strptime(date_str, '%Y%m%d%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 14 else datetime.strptime(date_str, '%Y%m%dT%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 15 else datetime.strptime(date_str, '%Y-%m-%d').strftime('%d %B %Y'),
+                        'date_modified' : date_modified,
-                    'annee': annee,
+                        'first_picture_url' : get_first_picture_url(content),
-                    'tags': tags,
+                        'date_formattee': datetime.strptime(date_str, '%Y%m%d%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 14 else datetime.strptime(date_str, '%Y%m%dT%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 15 else datetime.strptime(date_str, '%Y-%m-%d').strftime('%d %B %Y'),
-                    'title': title,
+                        'annee': annee,
-                    'next': None,
+                        'tags': tags,
-                    'previous': None,
+                        'title': title,
-                    'last_html_build': last_html_build_time,
+                        'next': None,
-                    'last_gemini_build': last_gemini_build,
+                        'previous': None,
-                    'org_content': content,  # Contenu Org original
+                        'last_html_build': last_html_build_time,
-                    'html_content_without_h1': html_content_without_h1,  # Contenu HTML converti sans le titre de premier niveau
+                        'last_gemini_build': last_gemini_build,
-                    'html_content': html_content  # Contenu first_picture_urlHTML converti
+                        'org_content': content,  # Contenu Org original
-                }
+                        'html_content_without_h1': html_content_without_h1,  # Contenu HTML converti sans le titre de premier niveau
                        'html_content': html_content,  # Contenu first_picture_urlHTML converti
                        'gemini_content': gemini_content,  # Contenu gemini
                    }
    print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
    print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")
--- a/new_article.py
+++ b/new_article.py
@ -89,34 +89,63 @@ def create_uuid_property():
    uuid_value = uuid.uuid4()
    return uuid_value
-# Écriture du fichier org
+def make_article(config):
-with open(filename, "w") as f: 
+    """
-    uuid = create_uuid_property()
+    Crée le contenu d'un nouvel article avec les propriétés spécifiées.
-    f.write(f"""
+
-:PROPERTIES:
+    Args:
-:ID:       {uuid}
+        config (dict): Dictionnaire contenant les paramètres de l'article:
            - uuid (str): Identifiant unique de l'article
            - slug (str): Slug de l'URL de l'article 
            - title (str): Titre de l'article
            - date_string_full (str): Date complète au format YYYY-MM-DD HH:MM:SS
            - date_string (str): Date au format YYYYMMDDHHMMSS
            - schema_slug (str): Slug avec ou sans préfixe année selon la config
            - blog_dir (str): Dossier du blog
    Returns:
        str: Contenu formaté de l'article avec les propriétés et métadonnées
    """
    return f""":PROPERTIES:
 :ID:       {config.get('uuid')}    
 :END:
-#+title: {args.title}
+#+title: {config.get('title')}
 #+post_ID: 
-#+post_slug: {slug}
+#+post_slug: {config.get('slug')}
-#+post_url: https://www.ciperbliss.com/{schema_slug}
+#+post_url: https://www.ciperbliss.com/{config.get('schema_slug')}
-#+post_title: {args.title}
+#+post_title: {config.get('title')}
 #+post_tags: 
 #+post_series: 
 #+post_type: post
 #+post_status: publish
 #+post_picture: 
-#+post_date_published: <{date_string_full}>
+#+post_date_published: <{config.get('date_string_full')}>
-#+post_date_modified: <{date_string_full}>
+#+post_date_modified: <{config.get('date_string_full')}>
-#+post_index_page_roam_id: {uuid}
+#+post_index_page_roam_id: {config.get('uuid')}
-#+BLOG: {args.blog_dir}
+#+BLOG: {config.get('blog_dir')}
-* {args.title}
+* {config.get('title')}
-""")
+"""
 # Écriture du fichier org
 with open(filename, "w") as f: 
    uuid = create_uuid_property()
    config={
        'uuid': uuid,
        'slug': slug,
        'title': args.title,
        'date_string_full': date_string_full,
        'date_string': date_string,
        'schema_slug': schema_slug,
        'blog_dir': args.blog_dir,
    }
    f.write(make_article(config))
 print(f"Le fichier '{filename}' a été créé avec succès.")
--- a/sources/dragonfeu_blog/lang_en/2024-11-02-hello-gemini.org
+++ b/sources/dragonfeu_blog/lang_en/2024-11-02-hello-gemini.org
@ -1,2 +0,0 @@
 * Hi, giminiciens
 hop hop hello in English
--- a/sources/dragonfeu_blog/lang_en/20250227154104__coucou-gemini-en-2025.org
+++ b/sources/dragonfeu_blog/lang_en/20250227154104__coucou-gemini-en-2025.org
@ -0,0 +1,25 @@
 :PROPERTIES:
 :ID:       41decd55-85b9-43a6-9c24-2da4985f2d87    
 :END:
 #+title: Coucou gemini en 2025
 #+post_ID: 
 #+post_slug: coucou-gemini-en-2025
 #+post_url: https://www.ciperbliss.com/2025/coucou-gemini-en-2025
 #+post_title: Coucou gemini en 2025
 #+post_tags: 
 #+post_series: 
 #+post_type: post
 #+post_status: publish
 #+post_picture: 
 #+post_date_published: <2025-02-27 15:41:04>
 #+post_date_modified: <2025-02-27 15:41:04>
 #+post_index_page_roam_id: 41decd55-85b9-43a6-9c24-2da4985f2d87
 #+BLOG: dragonfeu_blog
 * Hey gemini in 2025
 hey yoooooooooooo
--- a/sources/dragonfeu_blog/lang_en/20250227154659__coucou-gemini-en-2025.org
+++ b/sources/dragonfeu_blog/lang_en/20250227154659__coucou-gemini-en-2025.org
@ -0,0 +1,24 @@
 :PROPERTIES:
 :ID:       7a77f219-b581-4a67-be7a-c66588b7e3f7    
 :END:
 #+title: Coucou gemini en 2025
 #+post_ID: 
 #+post_slug: coucou-gemini-en-2025
 #+post_url: https://www.ciperbliss.com/2025/coucou-gemini-en-2025
 #+post_title: Coucou gemini en 2025
 #+post_tags: 
 #+post_series: 
 #+post_type: post
 #+post_status: publish
 #+post_picture: 
 #+post_date_published: <2025-02-27 15:46:59>
 #+post_date_modified: <2025-02-27 15:46:59>
 #+post_index_page_roam_id: 7a77f219-b581-4a67-be7a-c66588b7e3f7
 #+BLOG: dragonfeu_blog
 * Coucou gemini en 2025
--- a/test_org_conversion.py
+++ b/test_org_conversion.py
@ -0,0 +1,72 @@
 import pytest
 from utils import convert_org_to_gemini
 def test_org_to_gemini_conversion():
    # Exemple de contenu org
    org_content = """#+TITLE: Test Article
 #+AUTHOR: John Doe
 #+DATE: 2024-03-14
 * Premier titre
 Voici du texte simple.
 ** Sous-titre
 - Liste item 1
 - Liste item 2
 * Deuxième titre
 Un lien [[https://example.com][Example]]
 Et du *texte en gras* avec /italique/."""
    # Convertir le contenu directement
    result = convert_org_to_gemini(org_content)
    result = result.strip()
    print(f"result: {result}")
    # Vérifier les éléments clés de la conversion
    assert "# Premier titre" in result
    assert "## Sous-titre" in result
    assert "* Liste item 1" in result
    assert "* Liste item 2" in result
    assert "=> https://example.com Example" in result
 def test_org_to_gemini_tags():
    """Test de la détection des tags"""
    org_content = """#+TITLE: Test Article
 #+TAGS: chaton, mignon, félin
 * Un article sur les chatons
 Du contenu sur les chatons..."""
    result = find_tags_in_org_content(org_content)
    assert "chaton" in result, "Le tag 'chaton' devrait être présent dans le résultat"
 # def test_org_to_gemini_code_blocks():
 #     """Test de la conversion des blocs de code"""
 #     org_content = """#+BEGIN_SRC python
 # def hello():
 #     print("Hello, World!")
 # #+END_SRC"""
 #     result = convert_org_to_gemini(org_content)
 #     assert "```python" in result
 #     assert "def hello():" in result
 #     assert 'print("Hello, World!")' in result
 #     assert "```" in result
 # def test_org_to_gemini_tables():
 #     """Test de la conversion des tableaux"""
 #     org_content = """| Colonne 1 | Colonne 2 |
 # |-----------|-----------|
 # | Valeur 1  | Valeur 2  |
 # | Valeur 3  | Valeur 4  |"""
 #     result = convert_org_to_gemini(org_content)
 #     # Vérifier que le tableau est converti en texte lisible
 #     assert "Colonne 1" in result
 #     assert "Colonne 2" in result
 #     assert "Valeur 1" in result
 #     assert "Valeur 2" in result
 if __name__ == '__main__':
    pytest.main([__file__]) 
--- a/utils.py
+++ b/utils.py
@ -5,6 +5,9 @@ import shutil
 from datetime import datetime
 import unicodedata
 import pypandoc
 import subprocess
 import tempfile
 from md2gemini import md2gemini
 from website_config import *
@ -82,22 +85,23 @@ def get_blog_template_conf(blogname) -> dict:
    else:
        return configs_sites[blogname]
-def find_year_and_slug_on_filename(fichier):
+def find_year_and_slug_on_filename(filename):
-    fichier = fichier.replace('..', '.') 
+    print(f"Traitement du fichier: {filename}")  # Debug
-    slug = ''
+    try:
-    annee = datetime.now().year
+        # Supposons que le format attendu est "YYYYMMDDHHMMSS-slug.org"
-    date_str = f'{annee}-00-00'
+        date_str = filename[:14]  # Prend les 14 premiers caractères pour la date
-    date = f'{annee}-00-00'
+        annee = date_str[:4]      # Prend les 4 premiers caractères pour l'année
-    boom = fichier.split('__')
+        
-
+        # Gestion plus robuste du slug
-    if boom :
+        if '-' in filename:
-        date_str = boom[0]
+            slug = filename.split('-', 1)[1].replace('.org', '')
-        annee = date_str[:4]
+        else:
-        slug = boom[1].replace('.org', '')
+            slug = filename.replace('.org', '')
-        if "-" in date_str:
+            
-            slug = enlever_premier_tiret_ou_underscore(slug)
+        return date_str, annee, slug
-        return [date_str, annee, slug]
+    except Exception as e:
-    return [date_str, annee, fichier.replace(' ', '-').replace('.org', '')]
+        print(f"Format de fichier non standard: {filename}")
        return None, None, filename.replace('.org', '')
 def enlever_premier_tiret_ou_underscore(chaîne):
@ -212,21 +216,42 @@ def add_tags_from_content(tags=None, file_content="", words_to_check=None):
                tags.add(word)
    return tags
 # Variable globale pour stocker les fichiers sans tags
 untagged_files = []
-def extract_tags_from_file(file_path, excluded_tags):
+def save_untagged_files(output_file="sources/site_web/build/articles_without_tags.json"):
    """
    Sauvegarde la liste des fichiers sans tags dans un fichier JSON.
    :param output_file: Chemin du fichier JSON de sortie
    """
    import json
    import os
    # Créer le dossier de sortie si nécessaire
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    print('save_untagged_files', len(untagged_files))
    # Sauvegarder la liste dans le fichier JSON
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(untagged_files, f, ensure_ascii=False, indent=4)
 def extract_tags_from_file(file_path, excluded_tags, auto_detected_tags_list=global_config['auto_tag_terms']):
    tags = set()
    with open(file_path, 'r', encoding='utf-8') as file_content:
        tag_found = False
        for line in file_content:
-            if global_config['automatic_tagging_enabled']:
+            if global_config['automatic_tagging_enabled'] and len(auto_detected_tags_list) > 0:
-                tags = add_tags_from_content(tags, line, global_config['auto_tag_terms'])
+                tags = add_tags_from_content(tags, line, auto_detected_tags_list)
            # Check for orgmode tags :tag1:tag2:
-            if ':' in line:
+            if global_config.get('automatic_tagging_org_files', True):
-                for word in line.split():
+                if ':' in line:
-                    if len(word) and word.startswith(':') and word.endswith(':'):
+                    for word in line.split():
-                        tag = word[1:-1]
+                        if len(word) > 1 and word.startswith(':') and word.endswith(':'):
-                        if tag not in excluded_tags:
+                            tag = word[1:-1]
-                            tags.add(tag)
+                            if tag not in excluded_tags:
                                tags.add(tag)
                            tag_found = True
            # Check for #+tags: tag1,tag2
            if line.startswith('#+tags:'):
@ -236,7 +261,8 @@ def extract_tags_from_file(file_path, excluded_tags):
                        tags.add(tag)
                        tag_found = True
-    # if not tag_found:
+    if not tag_found:
        untagged_files.append(file_path)
    #     print('no tag in the article', file_path)
    return tags
@ -380,3 +406,110 @@ def convert_org_to_html(org_file, output_html_file):
        print(f"Conversion réussie : {org_file} -> {output_html_file}")
    except Exception as e:
        print(f"Erreur lors de la conversion de {org_file} : {e}")
 def get_first_picture_url(content):
    # Utiliser une expression régulière pour
    # trouver la première URL d'image dans le contenu
    pattern = r'\[\[(.*?)\]\]'
    match = re.search(pattern, content)
    if match:
        return match.group(1)
    else:
        return None
 def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
    """
    Convertit un texte au format Org en un fichier au format GMI (Gemini)
    en utilisant pypandoc.
    Args:
    - org_text (str): Le texte au format Org à convertir.
    - output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
    """
    output = """
 # mock land output
 ===========
 blah blah blah
 -----------------
 Tykayn blog mock content
 -----------------
 Navigation:
 => accueil.gmi Accueil 
 => a-propos.gmi à propos
    """
    # Conversion du texte Org en GMI via Pandoc
    try:
        output = pypandoc.convert_text(org_text, 'markdown', format='org')
    except RuntimeError as e:
        print(f"Erreur de conversion : {e}")
        return
    # Sauvegarde du contenu GMI dans un fichier
    try:
        with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
            f.write(output)
        print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
    except OSError as e:
        print(f"Erreur lors de la sauvegarde du fichier : {e}")
    return output
 def count_files_in_directories(directories):
    total_count = 0
    for directory in directories:
        for root, dirs, files in os.walk(directory):
            total_count += len(files)
    return total_count
 def convert_org_to_gemini(org_content):
    """
    Convertit un contenu org en gemini en utilisant pandoc et md2gemini
    Args:
        org_content (str): Contenu au format org
    Returns:
        str: Contenu converti en format gemini
    """
    try:
        # Créer un fichier temporaire avec le contenu org
        with tempfile.NamedTemporaryFile(mode='w', suffix='.org', encoding='utf-8') as temp_org:
            temp_org.write(org_content)
            temp_org.flush()
            # Première étape : conversion org vers markdown avec pandoc
            pandoc_cmd = [
                'pandoc',
                '-f', 'org',
                '-t', 'markdown',
                temp_org.name
            ]
            markdown_content = subprocess.check_output(
                pandoc_cmd,
                text=True,
                stderr=subprocess.PIPE
            )
        # Deuxième étape : conversion markdown vers gemini avec md2gemini
        gemini_content = md2gemini(
            markdown_content,
            frontmatter=True,
            links='inline',
        )
        return gemini_content.strip()
    except subprocess.CalledProcessError as e:
        print(f"Erreur lors de la conversion avec pandoc: {e.stderr}")
        raise
    except Exception as e:
        print(f"Erreur lors de la conversion: {str(e)}")
        raise
--- a/website_config.py
+++ b/website_config.py
@ -4,7 +4,9 @@ global_config = {
    "slug_with_year": True,
    # "show_logs": False,
    "show_logs": True,
    "lang_default": "fr",
    "automatic_tagging_enabled": True,
    "automatic_tagging_org_files": True,
    "rebuild_files_filter": 2024,
    "posts_per_page": 10,
    "source_files_extension": "org",
		`@ -1,2 +0,0 @@`
			`* Hi, giminiciens`
			`hop hop hello in English`