mirror of
https://forge.chapril.org/tykayn/orgmode-to-gemini-blog
synced 2025-06-20 09:04:42 +02:00
convert to gemini avec md2gemini, conversion de plusieurs langues
This commit is contained in:
parent
255e8fdc04
commit
bba1df0377
10 changed files with 462 additions and 202 deletions
|
@ -67,6 +67,9 @@ def group_files_by_tags(org_files, excluded_tags):
|
||||||
|
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
tag_to_files[tag].add(slug)
|
tag_to_files[tag].add(slug)
|
||||||
|
# Sauvegarder les fichiers sans tags
|
||||||
|
save_untagged_files(output_file=f"sources/{blog_folder}/build/articles_without_tags.json")
|
||||||
|
|
||||||
return tag_to_files
|
return tag_to_files
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
sudo apt install pandoc python3-pip npm
|
sudo apt install pandoc python3-pip npm
|
||||||
pip install uuid argparse os md2gemini
|
pip install uuid argparse os md2gemini pytest pypandoc
|
||||||
npm install -g sass
|
npm install -g sass
|
||||||
|
|
|
@ -42,7 +42,12 @@ def get_basename(file_name):
|
||||||
return os.path.splitext(file_name)[0]
|
return os.path.splitext(file_name)[0]
|
||||||
|
|
||||||
# Chemin du dossier contenant les fichiers orgmode
|
# Chemin du dossier contenant les fichiers orgmode
|
||||||
directory = f'sources/{args.blog}/lang_fr'
|
directory_pages = f'sources/{args.blog}/'
|
||||||
|
directory_fr = f'sources/{args.blog}/lang_fr'
|
||||||
|
directory_en = f'sources/{args.blog}/lang_en'
|
||||||
|
|
||||||
|
directories_to_scan = [directory_pages, directory_fr, directory_en]
|
||||||
|
|
||||||
destination_json = f'sources/{args.blog}/build'
|
destination_json = f'sources/{args.blog}/build'
|
||||||
destination_html = f'html-websites/{args.blog}/'
|
destination_html = f'html-websites/{args.blog}/'
|
||||||
destination_gmi = f'gemini-capsules/{args.blog}/'
|
destination_gmi = f'gemini-capsules/{args.blog}/'
|
||||||
|
@ -64,188 +69,157 @@ else:
|
||||||
files_dict = {}
|
files_dict = {}
|
||||||
|
|
||||||
|
|
||||||
def get_first_picture_url(content):
|
|
||||||
# Utiliser une expression régulière pour trouver la première URL d'image dans le contenu
|
|
||||||
pattern = r'\[\[(.*?)\]\]'
|
|
||||||
match = re.search(pattern, content)
|
|
||||||
if match:
|
|
||||||
return match.group(1)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
|
count_articles = count_files_in_directories(directories_to_scan)
|
||||||
"""
|
|
||||||
Convertit un texte au format Org en un fichier au format GMI (Gemini)
|
|
||||||
en utilisant pypandoc.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
- org_text (str): Le texte au format Org à convertir.
|
|
||||||
- output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
|
|
||||||
"""
|
|
||||||
output = """
|
|
||||||
# mock land output
|
|
||||||
===========
|
|
||||||
|
|
||||||
blah blah blah
|
|
||||||
|
|
||||||
-----------------
|
|
||||||
Tykayn blog mock content
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
Navigation:
|
|
||||||
|
|
||||||
=> accueil.gmi Accueil
|
|
||||||
=> a-propos.gmi à propos
|
|
||||||
"""
|
|
||||||
# Conversion du texte Org en GMI via Pandoc
|
|
||||||
try:
|
|
||||||
output = pypandoc.convert_text(org_text, 'markdown', format='org')
|
|
||||||
except RuntimeError as e:
|
|
||||||
print(f"Erreur de conversion : {e}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Sauvegarde du contenu GMI dans un fichier
|
|
||||||
try:
|
|
||||||
with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
|
|
||||||
f.write(output)
|
|
||||||
print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
|
|
||||||
except OSError as e:
|
|
||||||
print(f"Erreur lors de la sauvegarde du fichier : {e}")
|
|
||||||
return output
|
|
||||||
|
|
||||||
count_articles = len(os.listdir(directory))
|
|
||||||
counter=0
|
counter=0
|
||||||
rebuild_counter = 0
|
rebuild_counter = 0
|
||||||
pandoc_runs_counter = 0
|
pandoc_runs_counter = 0
|
||||||
|
lang_folder = global_config.get('lang_default', 'fr')
|
||||||
|
|
||||||
if generate_linkings_json :
|
if generate_linkings_json :
|
||||||
|
|
||||||
print(f"Génération des liens entre articles pour {count_articles} articles")
|
print(f"Génération des liens entre articles pour {count_articles} articles")
|
||||||
print(f"run_pandoc: {run_pandoc}")
|
print(f"run_pandoc: {run_pandoc}")
|
||||||
print(f"run_gemini: {run_gemini}")
|
print(f"run_gemini: {run_gemini}")
|
||||||
|
article_type = "article"
|
||||||
# Parcourir les fichiers du dossier
|
# Parcourir les fichiers du dossier
|
||||||
for file_name in os.listdir(directory):
|
|
||||||
if file_name.endswith('.org'):
|
for index, directory in enumerate(directories_to_scan):
|
||||||
counter+=1
|
# Déterminer le type d'article en fonction du chemin
|
||||||
if force_html_regen and counter % 10 == 0:
|
if directory == '/':
|
||||||
print(f"{time.strftime('%H:%M:%S')} : Articles traités : {counter}/{count_articles}")
|
article_type = "page"
|
||||||
file_path = os.path.join(directory, file_name)
|
else:
|
||||||
with open(file_path, "r", encoding="utf-8") as f:
|
article_type = "article"
|
||||||
content = f.read()
|
# Extraire la langue du dossier si elle commence par "lang_"
|
||||||
date_modified = time.ctime(os.path.getmtime(file_path))
|
if directory.split('/')[-1].startswith('lang_'):
|
||||||
|
lang_folder = directory.split('/')[-1][5:] # Prend les caractères après "lang_"
|
||||||
|
for file_name in os.listdir(directory):
|
||||||
|
if file_name.endswith('.org'):
|
||||||
|
counter+=1
|
||||||
|
if force_html_regen and counter % 10 == 0:
|
||||||
|
print(f"{time.strftime('%H:%M:%S')} : Articles traités : {counter}/{count_articles}")
|
||||||
|
file_path = os.path.join(directory, file_name)
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
date_modified = time.ctime(os.path.getmtime(file_path))
|
||||||
|
|
||||||
basename = get_basename(file_name)
|
basename = get_basename(file_name)
|
||||||
date_str, annee, slug = find_year_and_slug_on_filename(basename)
|
date_str, annee, slug = find_year_and_slug_on_filename(basename)
|
||||||
tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
|
tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
|
||||||
|
|
||||||
# Convertir les tags en liste si c'est un set
|
|
||||||
if isinstance(tags, set):
|
|
||||||
tags = list(tags)
|
|
||||||
boom = basename.split('__')
|
|
||||||
# Convertir le contenu Org en HTML
|
|
||||||
title = find_first_level1_title(content)
|
|
||||||
|
|
||||||
# Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
|
|
||||||
content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)
|
|
||||||
|
|
||||||
gemini_content = ''
|
|
||||||
html_content = ''
|
|
||||||
html_content_without_h1 = ''
|
|
||||||
# Vérifier l'existence du fichier HTML pour déterminer last_html_build
|
|
||||||
html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
|
|
||||||
last_html_build_time = None
|
|
||||||
if os.path.exists(html_path):
|
|
||||||
# Obtenir la date de création du fichier HTML
|
|
||||||
last_html_build_time = os.path.getctime(html_path)
|
|
||||||
|
|
||||||
# print(f"last_html_build: {last_html_build_time} : {html_path}")
|
|
||||||
else:
|
|
||||||
print(f"----------- last_html_build html_path: {html_path} n'existe pas")
|
|
||||||
# Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
|
|
||||||
gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
|
|
||||||
last_gemini_build = None
|
|
||||||
rebuild_this_article_gemini = False
|
|
||||||
if os.path.exists(gemini_path):
|
|
||||||
last_gemini_build = time.ctime(os.path.getmtime(gemini_path))
|
|
||||||
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
|
||||||
if last_gemini_build:
|
|
||||||
file_modified_time = os.path.getmtime(file_path)
|
|
||||||
last_build_time = time.mktime(time.strptime(last_gemini_build))
|
|
||||||
rebuild_this_article_gemini = file_modified_time > last_build_time
|
|
||||||
else:
|
|
||||||
|
|
||||||
rebuild_this_article_gemini = True
|
# Convertir les tags en liste si c'est un set
|
||||||
|
if isinstance(tags, set):
|
||||||
|
tags = list(tags)
|
||||||
|
boom = basename.split('__')
|
||||||
|
# Convertir le contenu Org en HTML
|
||||||
|
title = find_first_level1_title(content)
|
||||||
|
|
||||||
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
# Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
|
||||||
rebuild_this_article_html = False
|
content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)
|
||||||
if last_html_build_time:
|
|
||||||
file_modified_time = os.path.getmtime(file_path)
|
|
||||||
# print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
|
|
||||||
# Obtenir l'heure de dernière modification du fichier HTML
|
|
||||||
|
|
||||||
rebuild_this_article_html = file_modified_time > last_html_build_time
|
|
||||||
# print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
|
|
||||||
else:
|
|
||||||
# si il n'y a pas de fichier html, on le construit pour la première fois
|
|
||||||
print('on reconstruit le html de l\'article', file_name)
|
|
||||||
|
|
||||||
rebuild_this_article_html = True
|
|
||||||
|
|
||||||
if rebuild_this_article_html:
|
|
||||||
rebuild_counter += 1
|
|
||||||
|
|
||||||
|
gemini_content = ''
|
||||||
|
html_content = ''
|
||||||
|
html_content_without_h1 = ''
|
||||||
|
# Vérifier l'existence du fichier HTML pour déterminer last_html_build
|
||||||
|
html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
|
||||||
|
last_html_build_time = None
|
||||||
|
if os.path.exists(html_path):
|
||||||
|
# Obtenir la date de création du fichier HTML
|
||||||
|
last_html_build_time = os.path.getctime(html_path)
|
||||||
|
|
||||||
# Garder le contenu HTML existant si déjà présent
|
# print(f"last_html_build: {last_html_build_time} : {html_path}")
|
||||||
if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
|
|
||||||
print('on reprend le contenu html existant')
|
|
||||||
if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0:
|
|
||||||
html_content = files_dict[f"{annee}/{slug}"]['html_content']
|
|
||||||
if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0:
|
|
||||||
html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
|
|
||||||
else:
|
else:
|
||||||
html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
|
print(f"----------- last_html_build html_path: {html_path} n'existe pas")
|
||||||
|
# Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
|
||||||
|
gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
|
||||||
|
last_gemini_build = None
|
||||||
|
rebuild_this_article_gemini = False
|
||||||
|
if os.path.exists(gemini_path):
|
||||||
|
last_gemini_build = time.ctime(os.path.getmtime(gemini_path))
|
||||||
|
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
||||||
|
if last_gemini_build:
|
||||||
|
file_modified_time = os.path.getmtime(file_path)
|
||||||
|
last_build_time = time.mktime(time.strptime(last_gemini_build))
|
||||||
|
rebuild_this_article_gemini = file_modified_time > last_build_time
|
||||||
|
else:
|
||||||
|
|
||||||
|
rebuild_this_article_gemini = True
|
||||||
|
|
||||||
if run_pandoc and rebuild_this_article_html or force_html_regen:
|
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
||||||
# convertir le contenu d'article org vers html
|
rebuild_this_article_html = False
|
||||||
print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
|
if last_html_build_time:
|
||||||
|
file_modified_time = os.path.getmtime(file_path)
|
||||||
|
# print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
|
||||||
|
# Obtenir l'heure de dernière modification du fichier HTML
|
||||||
|
|
||||||
|
rebuild_this_article_html = file_modified_time > last_html_build_time
|
||||||
|
# print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
|
||||||
|
else:
|
||||||
|
# si il n'y a pas de fichier html, on le construit pour la première fois
|
||||||
|
print('on reconstruit le html de l\'article', file_name)
|
||||||
|
|
||||||
|
rebuild_this_article_html = True
|
||||||
|
|
||||||
html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
|
if rebuild_this_article_html:
|
||||||
pandoc_runs_counter += 1
|
rebuild_counter += 1
|
||||||
else:
|
|
||||||
html_content = content_without_h1
|
|
||||||
|
|
||||||
if run_gemini and rebuild_this_article_gemini:
|
|
||||||
os.makedirs(destination_gmi, exist_ok=True)
|
# Garder le contenu HTML existant si déjà présent
|
||||||
# convertir le contenu d'article org vers gmi pour la capsule gemini
|
if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
|
||||||
gemini_content = org_to_gmi(content_without_h1, slug)
|
print('on reprend le contenu html existant')
|
||||||
|
if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0:
|
||||||
|
html_content = files_dict[f"{annee}/{slug}"]['html_content']
|
||||||
|
if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0:
|
||||||
|
html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
|
||||||
|
else:
|
||||||
|
html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
|
||||||
|
|
||||||
|
if run_pandoc and rebuild_this_article_html or force_html_regen:
|
||||||
|
# convertir le contenu d'article org vers html
|
||||||
|
# print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
|
||||||
|
print(f"\033[91m.\033[0m", end='', flush=True)
|
||||||
|
|
||||||
|
html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
|
||||||
|
pandoc_runs_counter += 1
|
||||||
|
else:
|
||||||
|
html_content = content_without_h1
|
||||||
|
|
||||||
|
if run_gemini and rebuild_this_article_gemini:
|
||||||
|
os.makedirs(destination_gmi, exist_ok=True)
|
||||||
|
# convertir le contenu d'article org vers gmi pour la capsule gemini
|
||||||
|
print(f"Conversion de {file_name} en gemini")
|
||||||
|
gemini_content = org_to_gmi(content_without_h1, slug)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
files_dict[f"{annee}/{slug}"] = {
|
files_dict[f"{annee}/{slug}"] = {
|
||||||
'path': file_path,
|
'path': file_path,
|
||||||
'basename': basename,
|
'basename': basename,
|
||||||
'roam_id': find_org_roam_id(content),
|
'roam_id': find_org_roam_id(content),
|
||||||
'slug': f"{slug}/",
|
'slug': f"{slug}/",
|
||||||
'slug_with_year': f"{annee}/{slug}",
|
'slug_with_year': f"{annee}/{slug}",
|
||||||
'date': boom[0],
|
'date': boom[0],
|
||||||
'date_modified' : date_modified,
|
'lang': lang_folder,
|
||||||
'first_picture_url' : get_first_picture_url(content),
|
'article_type': article_type,
|
||||||
'date_formattee': datetime.strptime(date_str, '%Y%m%d%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 14 else datetime.strptime(date_str, '%Y%m%dT%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 15 else datetime.strptime(date_str, '%Y-%m-%d').strftime('%d %B %Y'),
|
'date_modified' : date_modified,
|
||||||
'annee': annee,
|
'first_picture_url' : get_first_picture_url(content),
|
||||||
'tags': tags,
|
'date_formattee': datetime.strptime(date_str, '%Y%m%d%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 14 else datetime.strptime(date_str, '%Y%m%dT%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 15 else datetime.strptime(date_str, '%Y-%m-%d').strftime('%d %B %Y'),
|
||||||
'title': title,
|
'annee': annee,
|
||||||
'next': None,
|
'tags': tags,
|
||||||
'previous': None,
|
'title': title,
|
||||||
'last_html_build': last_html_build_time,
|
'next': None,
|
||||||
'last_gemini_build': last_gemini_build,
|
'previous': None,
|
||||||
'org_content': content, # Contenu Org original
|
'last_html_build': last_html_build_time,
|
||||||
'html_content_without_h1': html_content_without_h1, # Contenu HTML converti sans le titre de premier niveau
|
'last_gemini_build': last_gemini_build,
|
||||||
'html_content': html_content # Contenu first_picture_urlHTML converti
|
'org_content': content, # Contenu Org original
|
||||||
}
|
'html_content_without_h1': html_content_without_h1, # Contenu HTML converti sans le titre de premier niveau
|
||||||
|
'html_content': html_content, # Contenu first_picture_urlHTML converti
|
||||||
|
'gemini_content': gemini_content, # Contenu gemini
|
||||||
|
}
|
||||||
|
|
||||||
print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
|
print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
|
||||||
print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")
|
print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")
|
||||||
|
|
|
@ -89,34 +89,63 @@ def create_uuid_property():
|
||||||
uuid_value = uuid.uuid4()
|
uuid_value = uuid.uuid4()
|
||||||
return uuid_value
|
return uuid_value
|
||||||
|
|
||||||
# Écriture du fichier org
|
def make_article(config):
|
||||||
with open(filename, "w") as f:
|
"""
|
||||||
uuid = create_uuid_property()
|
Crée le contenu d'un nouvel article avec les propriétés spécifiées.
|
||||||
f.write(f"""
|
|
||||||
:PROPERTIES:
|
Args:
|
||||||
:ID: {uuid}
|
config (dict): Dictionnaire contenant les paramètres de l'article:
|
||||||
|
- uuid (str): Identifiant unique de l'article
|
||||||
|
- slug (str): Slug de l'URL de l'article
|
||||||
|
- title (str): Titre de l'article
|
||||||
|
- date_string_full (str): Date complète au format YYYY-MM-DD HH:MM:SS
|
||||||
|
- date_string (str): Date au format YYYYMMDDHHMMSS
|
||||||
|
- schema_slug (str): Slug avec ou sans préfixe année selon la config
|
||||||
|
- blog_dir (str): Dossier du blog
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Contenu formaté de l'article avec les propriétés et métadonnées
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
return f""":PROPERTIES:
|
||||||
|
:ID: {config.get('uuid')}
|
||||||
:END:
|
:END:
|
||||||
|
|
||||||
#+title: {args.title}
|
#+title: {config.get('title')}
|
||||||
#+post_ID:
|
#+post_ID:
|
||||||
#+post_slug: {slug}
|
#+post_slug: {config.get('slug')}
|
||||||
|
|
||||||
|
|
||||||
#+post_url: https://www.ciperbliss.com/{schema_slug}
|
#+post_url: https://www.ciperbliss.com/{config.get('schema_slug')}
|
||||||
#+post_title: {args.title}
|
#+post_title: {config.get('title')}
|
||||||
#+post_tags:
|
#+post_tags:
|
||||||
#+post_series:
|
#+post_series:
|
||||||
#+post_type: post
|
#+post_type: post
|
||||||
#+post_status: publish
|
#+post_status: publish
|
||||||
#+post_picture:
|
#+post_picture:
|
||||||
#+post_date_published: <{date_string_full}>
|
#+post_date_published: <{config.get('date_string_full')}>
|
||||||
#+post_date_modified: <{date_string_full}>
|
#+post_date_modified: <{config.get('date_string_full')}>
|
||||||
#+post_index_page_roam_id: {uuid}
|
#+post_index_page_roam_id: {config.get('uuid')}
|
||||||
#+BLOG: {args.blog_dir}
|
#+BLOG: {config.get('blog_dir')}
|
||||||
|
|
||||||
* {args.title}
|
* {config.get('title')}
|
||||||
|
|
||||||
|
|
||||||
""")
|
"""
|
||||||
|
|
||||||
|
# Écriture du fichier org
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
uuid = create_uuid_property()
|
||||||
|
config={
|
||||||
|
'uuid': uuid,
|
||||||
|
'slug': slug,
|
||||||
|
'title': args.title,
|
||||||
|
'date_string_full': date_string_full,
|
||||||
|
'date_string': date_string,
|
||||||
|
'schema_slug': schema_slug,
|
||||||
|
'blog_dir': args.blog_dir,
|
||||||
|
}
|
||||||
|
f.write(make_article(config))
|
||||||
|
|
||||||
print(f"Le fichier '{filename}' a été créé avec succès.")
|
print(f"Le fichier '{filename}' a été créé avec succès.")
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
* Hi, giminiciens
|
|
||||||
hop hop hello in English
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
:PROPERTIES:
|
||||||
|
:ID: 41decd55-85b9-43a6-9c24-2da4985f2d87
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+title: Coucou gemini en 2025
|
||||||
|
#+post_ID:
|
||||||
|
#+post_slug: coucou-gemini-en-2025
|
||||||
|
|
||||||
|
|
||||||
|
#+post_url: https://www.ciperbliss.com/2025/coucou-gemini-en-2025
|
||||||
|
#+post_title: Coucou gemini en 2025
|
||||||
|
#+post_tags:
|
||||||
|
#+post_series:
|
||||||
|
#+post_type: post
|
||||||
|
#+post_status: publish
|
||||||
|
#+post_picture:
|
||||||
|
#+post_date_published: <2025-02-27 15:41:04>
|
||||||
|
#+post_date_modified: <2025-02-27 15:41:04>
|
||||||
|
#+post_index_page_roam_id: 41decd55-85b9-43a6-9c24-2da4985f2d87
|
||||||
|
#+BLOG: dragonfeu_blog
|
||||||
|
|
||||||
|
* Hey gemini in 2025
|
||||||
|
|
||||||
|
hey yoooooooooooo
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
:PROPERTIES:
|
||||||
|
:ID: 7a77f219-b581-4a67-be7a-c66588b7e3f7
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+title: Coucou gemini en 2025
|
||||||
|
#+post_ID:
|
||||||
|
#+post_slug: coucou-gemini-en-2025
|
||||||
|
|
||||||
|
|
||||||
|
#+post_url: https://www.ciperbliss.com/2025/coucou-gemini-en-2025
|
||||||
|
#+post_title: Coucou gemini en 2025
|
||||||
|
#+post_tags:
|
||||||
|
#+post_series:
|
||||||
|
#+post_type: post
|
||||||
|
#+post_status: publish
|
||||||
|
#+post_picture:
|
||||||
|
#+post_date_published: <2025-02-27 15:46:59>
|
||||||
|
#+post_date_modified: <2025-02-27 15:46:59>
|
||||||
|
#+post_index_page_roam_id: 7a77f219-b581-4a67-be7a-c66588b7e3f7
|
||||||
|
#+BLOG: dragonfeu_blog
|
||||||
|
|
||||||
|
* Coucou gemini en 2025
|
||||||
|
|
||||||
|
|
72
test_org_conversion.py
Normal file
72
test_org_conversion.py
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
import pytest
|
||||||
|
from utils import convert_org_to_gemini
|
||||||
|
|
||||||
|
def test_org_to_gemini_conversion():
|
||||||
|
# Exemple de contenu org
|
||||||
|
org_content = """#+TITLE: Test Article
|
||||||
|
#+AUTHOR: John Doe
|
||||||
|
#+DATE: 2024-03-14
|
||||||
|
|
||||||
|
* Premier titre
|
||||||
|
Voici du texte simple.
|
||||||
|
|
||||||
|
** Sous-titre
|
||||||
|
- Liste item 1
|
||||||
|
- Liste item 2
|
||||||
|
|
||||||
|
* Deuxième titre
|
||||||
|
Un lien [[https://example.com][Example]]
|
||||||
|
Et du *texte en gras* avec /italique/."""
|
||||||
|
|
||||||
|
# Convertir le contenu directement
|
||||||
|
result = convert_org_to_gemini(org_content)
|
||||||
|
result = result.strip()
|
||||||
|
print(f"result: {result}")
|
||||||
|
# Vérifier les éléments clés de la conversion
|
||||||
|
assert "# Premier titre" in result
|
||||||
|
assert "## Sous-titre" in result
|
||||||
|
assert "* Liste item 1" in result
|
||||||
|
assert "* Liste item 2" in result
|
||||||
|
assert "=> https://example.com Example" in result
|
||||||
|
|
||||||
|
def test_org_to_gemini_tags():
|
||||||
|
"""Test de la détection des tags"""
|
||||||
|
org_content = """#+TITLE: Test Article
|
||||||
|
#+TAGS: chaton, mignon, félin
|
||||||
|
|
||||||
|
* Un article sur les chatons
|
||||||
|
Du contenu sur les chatons..."""
|
||||||
|
|
||||||
|
result = find_tags_in_org_content(org_content)
|
||||||
|
assert "chaton" in result, "Le tag 'chaton' devrait être présent dans le résultat"
|
||||||
|
|
||||||
|
|
||||||
|
# def test_org_to_gemini_code_blocks():
|
||||||
|
# """Test de la conversion des blocs de code"""
|
||||||
|
# org_content = """#+BEGIN_SRC python
|
||||||
|
# def hello():
|
||||||
|
# print("Hello, World!")
|
||||||
|
# #+END_SRC"""
|
||||||
|
|
||||||
|
# result = convert_org_to_gemini(org_content)
|
||||||
|
# assert "```python" in result
|
||||||
|
# assert "def hello():" in result
|
||||||
|
# assert 'print("Hello, World!")' in result
|
||||||
|
# assert "```" in result
|
||||||
|
|
||||||
|
# def test_org_to_gemini_tables():
|
||||||
|
# """Test de la conversion des tableaux"""
|
||||||
|
# org_content = """| Colonne 1 | Colonne 2 |
|
||||||
|
# |-----------|-----------|
|
||||||
|
# | Valeur 1 | Valeur 2 |
|
||||||
|
# | Valeur 3 | Valeur 4 |"""
|
||||||
|
|
||||||
|
# result = convert_org_to_gemini(org_content)
|
||||||
|
# # Vérifier que le tableau est converti en texte lisible
|
||||||
|
# assert "Colonne 1" in result
|
||||||
|
# assert "Colonne 2" in result
|
||||||
|
# assert "Valeur 1" in result
|
||||||
|
# assert "Valeur 2" in result
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
pytest.main([__file__])
|
185
utils.py
185
utils.py
|
@ -5,6 +5,9 @@ import shutil
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import pypandoc
|
import pypandoc
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from md2gemini import md2gemini
|
||||||
|
|
||||||
from website_config import *
|
from website_config import *
|
||||||
|
|
||||||
|
@ -82,22 +85,23 @@ def get_blog_template_conf(blogname) -> dict:
|
||||||
else:
|
else:
|
||||||
return configs_sites[blogname]
|
return configs_sites[blogname]
|
||||||
|
|
||||||
def find_year_and_slug_on_filename(fichier):
|
def find_year_and_slug_on_filename(filename):
|
||||||
fichier = fichier.replace('..', '.')
|
print(f"Traitement du fichier: {filename}") # Debug
|
||||||
slug = ''
|
try:
|
||||||
annee = datetime.now().year
|
# Supposons que le format attendu est "YYYYMMDDHHMMSS-slug.org"
|
||||||
date_str = f'{annee}-00-00'
|
date_str = filename[:14] # Prend les 14 premiers caractères pour la date
|
||||||
date = f'{annee}-00-00'
|
annee = date_str[:4] # Prend les 4 premiers caractères pour l'année
|
||||||
boom = fichier.split('__')
|
|
||||||
|
# Gestion plus robuste du slug
|
||||||
if boom :
|
if '-' in filename:
|
||||||
date_str = boom[0]
|
slug = filename.split('-', 1)[1].replace('.org', '')
|
||||||
annee = date_str[:4]
|
else:
|
||||||
slug = boom[1].replace('.org', '')
|
slug = filename.replace('.org', '')
|
||||||
if "-" in date_str:
|
|
||||||
slug = enlever_premier_tiret_ou_underscore(slug)
|
return date_str, annee, slug
|
||||||
return [date_str, annee, slug]
|
except Exception as e:
|
||||||
return [date_str, annee, fichier.replace(' ', '-').replace('.org', '')]
|
print(f"Format de fichier non standard: {filename}")
|
||||||
|
return None, None, filename.replace('.org', '')
|
||||||
|
|
||||||
|
|
||||||
def enlever_premier_tiret_ou_underscore(chaîne):
|
def enlever_premier_tiret_ou_underscore(chaîne):
|
||||||
|
@ -212,21 +216,42 @@ def add_tags_from_content(tags=None, file_content="", words_to_check=None):
|
||||||
tags.add(word)
|
tags.add(word)
|
||||||
|
|
||||||
return tags
|
return tags
|
||||||
|
# Variable globale pour stocker les fichiers sans tags
|
||||||
|
untagged_files = []
|
||||||
|
|
||||||
def extract_tags_from_file(file_path, excluded_tags):
|
def save_untagged_files(output_file="sources/site_web/build/articles_without_tags.json"):
|
||||||
|
"""
|
||||||
|
Sauvegarde la liste des fichiers sans tags dans un fichier JSON.
|
||||||
|
|
||||||
|
:param output_file: Chemin du fichier JSON de sortie
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Créer le dossier de sortie si nécessaire
|
||||||
|
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||||
|
|
||||||
|
print('save_untagged_files', len(untagged_files))
|
||||||
|
# Sauvegarder la liste dans le fichier JSON
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(untagged_files, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_tags_from_file(file_path, excluded_tags, auto_detected_tags_list=global_config['auto_tag_terms']):
|
||||||
tags = set()
|
tags = set()
|
||||||
with open(file_path, 'r', encoding='utf-8') as file_content:
|
with open(file_path, 'r', encoding='utf-8') as file_content:
|
||||||
tag_found = False
|
tag_found = False
|
||||||
for line in file_content:
|
for line in file_content:
|
||||||
if global_config['automatic_tagging_enabled']:
|
if global_config['automatic_tagging_enabled'] and len(auto_detected_tags_list) > 0:
|
||||||
tags = add_tags_from_content(tags, line, global_config['auto_tag_terms'])
|
tags = add_tags_from_content(tags, line, auto_detected_tags_list)
|
||||||
# Check for orgmode tags :tag1:tag2:
|
# Check for orgmode tags :tag1:tag2:
|
||||||
if ':' in line:
|
if global_config.get('automatic_tagging_org_files', True):
|
||||||
for word in line.split():
|
if ':' in line:
|
||||||
if len(word) and word.startswith(':') and word.endswith(':'):
|
for word in line.split():
|
||||||
tag = word[1:-1]
|
if len(word) > 1 and word.startswith(':') and word.endswith(':'):
|
||||||
if tag not in excluded_tags:
|
tag = word[1:-1]
|
||||||
tags.add(tag)
|
if tag not in excluded_tags:
|
||||||
|
tags.add(tag)
|
||||||
tag_found = True
|
tag_found = True
|
||||||
# Check for #+tags: tag1,tag2
|
# Check for #+tags: tag1,tag2
|
||||||
if line.startswith('#+tags:'):
|
if line.startswith('#+tags:'):
|
||||||
|
@ -236,7 +261,8 @@ def extract_tags_from_file(file_path, excluded_tags):
|
||||||
tags.add(tag)
|
tags.add(tag)
|
||||||
tag_found = True
|
tag_found = True
|
||||||
|
|
||||||
# if not tag_found:
|
if not tag_found:
|
||||||
|
untagged_files.append(file_path)
|
||||||
# print('no tag in the article', file_path)
|
# print('no tag in the article', file_path)
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
@ -380,3 +406,110 @@ def convert_org_to_html(org_file, output_html_file):
|
||||||
print(f"Conversion réussie : {org_file} -> {output_html_file}")
|
print(f"Conversion réussie : {org_file} -> {output_html_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Erreur lors de la conversion de {org_file} : {e}")
|
print(f"Erreur lors de la conversion de {org_file} : {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_first_picture_url(content):
|
||||||
|
# Utiliser une expression régulière pour
|
||||||
|
# trouver la première URL d'image dans le contenu
|
||||||
|
pattern = r'\[\[(.*?)\]\]'
|
||||||
|
match = re.search(pattern, content)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
|
||||||
|
"""
|
||||||
|
Convertit un texte au format Org en un fichier au format GMI (Gemini)
|
||||||
|
en utilisant pypandoc.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
- org_text (str): Le texte au format Org à convertir.
|
||||||
|
- output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
|
||||||
|
"""
|
||||||
|
output = """
|
||||||
|
# mock land output
|
||||||
|
===========
|
||||||
|
|
||||||
|
blah blah blah
|
||||||
|
|
||||||
|
-----------------
|
||||||
|
Tykayn blog mock content
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Navigation:
|
||||||
|
|
||||||
|
=> accueil.gmi Accueil
|
||||||
|
=> a-propos.gmi à propos
|
||||||
|
"""
|
||||||
|
# Conversion du texte Org en GMI via Pandoc
|
||||||
|
try:
|
||||||
|
output = pypandoc.convert_text(org_text, 'markdown', format='org')
|
||||||
|
except RuntimeError as e:
|
||||||
|
print(f"Erreur de conversion : {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sauvegarde du contenu GMI dans un fichier
|
||||||
|
try:
|
||||||
|
with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
|
||||||
|
f.write(output)
|
||||||
|
print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
|
||||||
|
except OSError as e:
|
||||||
|
print(f"Erreur lors de la sauvegarde du fichier : {e}")
|
||||||
|
return output
|
||||||
|
|
||||||
|
def count_files_in_directories(directories):
|
||||||
|
total_count = 0
|
||||||
|
for directory in directories:
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
total_count += len(files)
|
||||||
|
return total_count
|
||||||
|
|
||||||
|
|
||||||
|
def convert_org_to_gemini(org_content):
|
||||||
|
"""
|
||||||
|
Convertit un contenu org en gemini en utilisant pandoc et md2gemini
|
||||||
|
|
||||||
|
Args:
|
||||||
|
org_content (str): Contenu au format org
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Contenu converti en format gemini
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Créer un fichier temporaire avec le contenu org
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.org', encoding='utf-8') as temp_org:
|
||||||
|
temp_org.write(org_content)
|
||||||
|
temp_org.flush()
|
||||||
|
|
||||||
|
# Première étape : conversion org vers markdown avec pandoc
|
||||||
|
pandoc_cmd = [
|
||||||
|
'pandoc',
|
||||||
|
'-f', 'org',
|
||||||
|
'-t', 'markdown',
|
||||||
|
temp_org.name
|
||||||
|
]
|
||||||
|
|
||||||
|
markdown_content = subprocess.check_output(
|
||||||
|
pandoc_cmd,
|
||||||
|
text=True,
|
||||||
|
stderr=subprocess.PIPE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deuxième étape : conversion markdown vers gemini avec md2gemini
|
||||||
|
gemini_content = md2gemini(
|
||||||
|
markdown_content,
|
||||||
|
frontmatter=True,
|
||||||
|
links='inline',
|
||||||
|
)
|
||||||
|
|
||||||
|
return gemini_content.strip()
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Erreur lors de la conversion avec pandoc: {e.stderr}")
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Erreur lors de la conversion: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
|
@ -4,7 +4,9 @@ global_config = {
|
||||||
"slug_with_year": True,
|
"slug_with_year": True,
|
||||||
# "show_logs": False,
|
# "show_logs": False,
|
||||||
"show_logs": True,
|
"show_logs": True,
|
||||||
|
"lang_default": "fr",
|
||||||
"automatic_tagging_enabled": True,
|
"automatic_tagging_enabled": True,
|
||||||
|
"automatic_tagging_org_files": True,
|
||||||
"rebuild_files_filter": 2024,
|
"rebuild_files_filter": 2024,
|
||||||
"posts_per_page": 10,
|
"posts_per_page": 10,
|
||||||
"source_files_extension": "org",
|
"source_files_extension": "org",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue