mirror of
https://forge.chapril.org/tykayn/orgmode-to-gemini-blog
synced 2025-06-20 09:04:42 +02:00
convert to gemini avec md2gemini, conversion de plusieurs langues
This commit is contained in:
parent
255e8fdc04
commit
bba1df0377
10 changed files with 462 additions and 202 deletions
|
@ -67,6 +67,9 @@ def group_files_by_tags(org_files, excluded_tags):
|
|||
|
||||
for tag in tags:
|
||||
tag_to_files[tag].add(slug)
|
||||
# Sauvegarder les fichiers sans tags
|
||||
save_untagged_files(output_file=f"sources/{blog_folder}/build/articles_without_tags.json")
|
||||
|
||||
return tag_to_files
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
sudo apt install pandoc python3-pip npm
|
||||
pip install uuid argparse os md2gemini
|
||||
pip install uuid argparse os md2gemini pytest pypandoc
|
||||
npm install -g sass
|
|
@ -42,7 +42,12 @@ def get_basename(file_name):
|
|||
return os.path.splitext(file_name)[0]
|
||||
|
||||
# Chemin du dossier contenant les fichiers orgmode
|
||||
directory = f'sources/{args.blog}/lang_fr'
|
||||
directory_pages = f'sources/{args.blog}/'
|
||||
directory_fr = f'sources/{args.blog}/lang_fr'
|
||||
directory_en = f'sources/{args.blog}/lang_en'
|
||||
|
||||
directories_to_scan = [directory_pages, directory_fr, directory_en]
|
||||
|
||||
destination_json = f'sources/{args.blog}/build'
|
||||
destination_html = f'html-websites/{args.blog}/'
|
||||
destination_gmi = f'gemini-capsules/{args.blog}/'
|
||||
|
@ -64,188 +69,157 @@ else:
|
|||
files_dict = {}
|
||||
|
||||
|
||||
def get_first_picture_url(content):
|
||||
# Utiliser une expression régulière pour trouver la première URL d'image dans le contenu
|
||||
pattern = r'\[\[(.*?)\]\]'
|
||||
match = re.search(pattern, content)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
|
||||
"""
|
||||
Convertit un texte au format Org en un fichier au format GMI (Gemini)
|
||||
en utilisant pypandoc.
|
||||
count_articles = count_files_in_directories(directories_to_scan)
|
||||
|
||||
Args:
|
||||
- org_text (str): Le texte au format Org à convertir.
|
||||
- output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
|
||||
"""
|
||||
output = """
|
||||
# mock land output
|
||||
===========
|
||||
|
||||
blah blah blah
|
||||
|
||||
-----------------
|
||||
Tykayn blog mock content
|
||||
-----------------
|
||||
|
||||
Navigation:
|
||||
|
||||
=> accueil.gmi Accueil
|
||||
=> a-propos.gmi à propos
|
||||
"""
|
||||
# Conversion du texte Org en GMI via Pandoc
|
||||
try:
|
||||
output = pypandoc.convert_text(org_text, 'markdown', format='org')
|
||||
except RuntimeError as e:
|
||||
print(f"Erreur de conversion : {e}")
|
||||
return
|
||||
|
||||
# Sauvegarde du contenu GMI dans un fichier
|
||||
try:
|
||||
with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
|
||||
except OSError as e:
|
||||
print(f"Erreur lors de la sauvegarde du fichier : {e}")
|
||||
return output
|
||||
|
||||
count_articles = len(os.listdir(directory))
|
||||
counter=0
|
||||
rebuild_counter = 0
|
||||
pandoc_runs_counter = 0
|
||||
lang_folder = global_config.get('lang_default', 'fr')
|
||||
|
||||
if generate_linkings_json :
|
||||
|
||||
print(f"Génération des liens entre articles pour {count_articles} articles")
|
||||
print(f"run_pandoc: {run_pandoc}")
|
||||
print(f"run_gemini: {run_gemini}")
|
||||
|
||||
article_type = "article"
|
||||
# Parcourir les fichiers du dossier
|
||||
for file_name in os.listdir(directory):
|
||||
if file_name.endswith('.org'):
|
||||
counter+=1
|
||||
if force_html_regen and counter % 10 == 0:
|
||||
print(f"{time.strftime('%H:%M:%S')} : Articles traités : {counter}/{count_articles}")
|
||||
file_path = os.path.join(directory, file_name)
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
date_modified = time.ctime(os.path.getmtime(file_path))
|
||||
|
||||
basename = get_basename(file_name)
|
||||
date_str, annee, slug = find_year_and_slug_on_filename(basename)
|
||||
tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
|
||||
for index, directory in enumerate(directories_to_scan):
|
||||
# Déterminer le type d'article en fonction du chemin
|
||||
if directory == '/':
|
||||
article_type = "page"
|
||||
else:
|
||||
article_type = "article"
|
||||
# Extraire la langue du dossier si elle commence par "lang_"
|
||||
if directory.split('/')[-1].startswith('lang_'):
|
||||
lang_folder = directory.split('/')[-1][5:] # Prend les caractères après "lang_"
|
||||
for file_name in os.listdir(directory):
|
||||
if file_name.endswith('.org'):
|
||||
counter+=1
|
||||
if force_html_regen and counter % 10 == 0:
|
||||
print(f"{time.strftime('%H:%M:%S')} : Articles traités : {counter}/{count_articles}")
|
||||
file_path = os.path.join(directory, file_name)
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
date_modified = time.ctime(os.path.getmtime(file_path))
|
||||
|
||||
# Convertir les tags en liste si c'est un set
|
||||
if isinstance(tags, set):
|
||||
tags = list(tags)
|
||||
boom = basename.split('__')
|
||||
# Convertir le contenu Org en HTML
|
||||
title = find_first_level1_title(content)
|
||||
basename = get_basename(file_name)
|
||||
date_str, annee, slug = find_year_and_slug_on_filename(basename)
|
||||
tags = extract_tags_from_file(file_path, global_config['excluded_tags'])
|
||||
|
||||
# Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
|
||||
content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)
|
||||
# Convertir les tags en liste si c'est un set
|
||||
if isinstance(tags, set):
|
||||
tags = list(tags)
|
||||
boom = basename.split('__')
|
||||
# Convertir le contenu Org en HTML
|
||||
title = find_first_level1_title(content)
|
||||
|
||||
gemini_content = ''
|
||||
html_content = ''
|
||||
html_content_without_h1 = ''
|
||||
# Vérifier l'existence du fichier HTML pour déterminer last_html_build
|
||||
html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
|
||||
last_html_build_time = None
|
||||
if os.path.exists(html_path):
|
||||
# Obtenir la date de création du fichier HTML
|
||||
last_html_build_time = os.path.getctime(html_path)
|
||||
# Désactiver les warning d'identifiant dupliqué dans la conversion pandoc
|
||||
content_without_h1 = re.sub(r'^\*.*?$', '', content, count=1, flags=re.MULTILINE)
|
||||
|
||||
# print(f"last_html_build: {last_html_build_time} : {html_path}")
|
||||
else:
|
||||
print(f"----------- last_html_build html_path: {html_path} n'existe pas")
|
||||
# Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
|
||||
gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
|
||||
last_gemini_build = None
|
||||
rebuild_this_article_gemini = False
|
||||
if os.path.exists(gemini_path):
|
||||
last_gemini_build = time.ctime(os.path.getmtime(gemini_path))
|
||||
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
||||
if last_gemini_build:
|
||||
file_modified_time = os.path.getmtime(file_path)
|
||||
last_build_time = time.mktime(time.strptime(last_gemini_build))
|
||||
rebuild_this_article_gemini = file_modified_time > last_build_time
|
||||
else:
|
||||
gemini_content = ''
|
||||
html_content = ''
|
||||
html_content_without_h1 = ''
|
||||
# Vérifier l'existence du fichier HTML pour déterminer last_html_build
|
||||
html_path = f"html-websites/{args.blog}/{annee}/{slug}/index.html"
|
||||
last_html_build_time = None
|
||||
if os.path.exists(html_path):
|
||||
# Obtenir la date de création du fichier HTML
|
||||
last_html_build_time = os.path.getctime(html_path)
|
||||
|
||||
rebuild_this_article_gemini = True
|
||||
|
||||
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
||||
rebuild_this_article_html = False
|
||||
if last_html_build_time:
|
||||
file_modified_time = os.path.getmtime(file_path)
|
||||
# print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
|
||||
# Obtenir l'heure de dernière modification du fichier HTML
|
||||
|
||||
rebuild_this_article_html = file_modified_time > last_html_build_time
|
||||
# print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
|
||||
else:
|
||||
# si il n'y a pas de fichier html, on le construit pour la première fois
|
||||
print('on reconstruit le html de l\'article', file_name)
|
||||
|
||||
rebuild_this_article_html = True
|
||||
|
||||
if rebuild_this_article_html:
|
||||
rebuild_counter += 1
|
||||
|
||||
|
||||
# Garder le contenu HTML existant si déjà présent
|
||||
if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
|
||||
print('on reprend le contenu html existant')
|
||||
if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0:
|
||||
html_content = files_dict[f"{annee}/{slug}"]['html_content']
|
||||
if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0:
|
||||
html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
|
||||
# print(f"last_html_build: {last_html_build_time} : {html_path}")
|
||||
else:
|
||||
print(f"----------- last_html_build html_path: {html_path} n'existe pas")
|
||||
# Vérifier l'existence du fichier Gemini pour déterminer last_gemini_build
|
||||
gemini_path = f"gemini-capsules/{args.blog}/{slug}.gmi"
|
||||
last_gemini_build = None
|
||||
rebuild_this_article_gemini = False
|
||||
if os.path.exists(gemini_path):
|
||||
last_gemini_build = time.ctime(os.path.getmtime(gemini_path))
|
||||
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
||||
if last_gemini_build:
|
||||
file_modified_time = os.path.getmtime(file_path)
|
||||
last_build_time = time.mktime(time.strptime(last_gemini_build))
|
||||
rebuild_this_article_gemini = file_modified_time > last_build_time
|
||||
else:
|
||||
html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
|
||||
|
||||
if run_pandoc and rebuild_this_article_html or force_html_regen:
|
||||
# convertir le contenu d'article org vers html
|
||||
print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
|
||||
rebuild_this_article_gemini = True
|
||||
|
||||
html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
|
||||
pandoc_runs_counter += 1
|
||||
else:
|
||||
html_content = content_without_h1
|
||||
# Vérifier si l'article doit être reconstruit en comparant les dates de modification
|
||||
rebuild_this_article_html = False
|
||||
if last_html_build_time:
|
||||
file_modified_time = os.path.getmtime(file_path)
|
||||
# print(f"--------- file_modified_time: {file_path} : {file_modified_time}")
|
||||
# Obtenir l'heure de dernière modification du fichier HTML
|
||||
|
||||
if run_gemini and rebuild_this_article_gemini:
|
||||
os.makedirs(destination_gmi, exist_ok=True)
|
||||
# convertir le contenu d'article org vers gmi pour la capsule gemini
|
||||
gemini_content = org_to_gmi(content_without_h1, slug)
|
||||
rebuild_this_article_html = file_modified_time > last_html_build_time
|
||||
# print(f"--------- article modifié après le build de son rendu html: {file_path}, {rebuild_this_article_html}")
|
||||
else:
|
||||
# si il n'y a pas de fichier html, on le construit pour la première fois
|
||||
print('on reconstruit le html de l\'article', file_name)
|
||||
|
||||
rebuild_this_article_html = True
|
||||
|
||||
if rebuild_this_article_html:
|
||||
rebuild_counter += 1
|
||||
|
||||
|
||||
# Garder le contenu HTML existant si déjà présent
|
||||
if f"{annee}/{slug}" in files_dict and 'html_content' in files_dict[f"{annee}/{slug}"]:
|
||||
print('on reprend le contenu html existant')
|
||||
if len(files_dict[f"{annee}/{slug}"]['html_content']) > 0:
|
||||
html_content = files_dict[f"{annee}/{slug}"]['html_content']
|
||||
if len(files_dict[f"{annee}/{slug}"]['html_content_without_h1']) > 0:
|
||||
html_content_without_h1 = files_dict[f"{annee}/{slug}"]['html_content_without_h1']
|
||||
else:
|
||||
html_content_without_h1 = re.sub(r'<h1>.*?</h1>', '', html_content)
|
||||
|
||||
if run_pandoc and rebuild_this_article_html or force_html_regen:
|
||||
# convertir le contenu d'article org vers html
|
||||
# print(f"\033[91mBRRRRRRRRRRRRR pandoc time {time.strftime('%H:%M:%S')} : Conversion de {file_name} en html\033[0m")
|
||||
print(f"\033[91m.\033[0m", end='', flush=True)
|
||||
|
||||
html_content = pypandoc.convert_text(content_without_h1, 'html', format='org')
|
||||
pandoc_runs_counter += 1
|
||||
else:
|
||||
html_content = content_without_h1
|
||||
|
||||
if run_gemini and rebuild_this_article_gemini:
|
||||
os.makedirs(destination_gmi, exist_ok=True)
|
||||
# convertir le contenu d'article org vers gmi pour la capsule gemini
|
||||
print(f"Conversion de {file_name} en gemini")
|
||||
gemini_content = org_to_gmi(content_without_h1, slug)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
files_dict[f"{annee}/{slug}"] = {
|
||||
'path': file_path,
|
||||
'basename': basename,
|
||||
'roam_id': find_org_roam_id(content),
|
||||
'slug': f"{slug}/",
|
||||
'slug_with_year': f"{annee}/{slug}",
|
||||
'date': boom[0],
|
||||
'date_modified' : date_modified,
|
||||
'first_picture_url' : get_first_picture_url(content),
|
||||
'date_formattee': datetime.strptime(date_str, '%Y%m%d%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 14 else datetime.strptime(date_str, '%Y%m%dT%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 15 else datetime.strptime(date_str, '%Y-%m-%d').strftime('%d %B %Y'),
|
||||
'annee': annee,
|
||||
'tags': tags,
|
||||
'title': title,
|
||||
'next': None,
|
||||
'previous': None,
|
||||
'last_html_build': last_html_build_time,
|
||||
'last_gemini_build': last_gemini_build,
|
||||
'org_content': content, # Contenu Org original
|
||||
'html_content_without_h1': html_content_without_h1, # Contenu HTML converti sans le titre de premier niveau
|
||||
'html_content': html_content # Contenu first_picture_urlHTML converti
|
||||
}
|
||||
files_dict[f"{annee}/{slug}"] = {
|
||||
'path': file_path,
|
||||
'basename': basename,
|
||||
'roam_id': find_org_roam_id(content),
|
||||
'slug': f"{slug}/",
|
||||
'slug_with_year': f"{annee}/{slug}",
|
||||
'date': boom[0],
|
||||
'lang': lang_folder,
|
||||
'article_type': article_type,
|
||||
'date_modified' : date_modified,
|
||||
'first_picture_url' : get_first_picture_url(content),
|
||||
'date_formattee': datetime.strptime(date_str, '%Y%m%d%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 14 else datetime.strptime(date_str, '%Y%m%dT%H%M%S').strftime('%d %B %Y à %H:%M:%S') if len(date_str) == 15 else datetime.strptime(date_str, '%Y-%m-%d').strftime('%d %B %Y'),
|
||||
'annee': annee,
|
||||
'tags': tags,
|
||||
'title': title,
|
||||
'next': None,
|
||||
'previous': None,
|
||||
'last_html_build': last_html_build_time,
|
||||
'last_gemini_build': last_gemini_build,
|
||||
'org_content': content, # Contenu Org original
|
||||
'html_content_without_h1': html_content_without_h1, # Contenu HTML converti sans le titre de premier niveau
|
||||
'html_content': html_content, # Contenu first_picture_urlHTML converti
|
||||
'gemini_content': gemini_content, # Contenu gemini
|
||||
}
|
||||
|
||||
print(f"======= Nombre d'articles reconstruits: {rebuild_counter}")
|
||||
print(f"======= Nombre de runs de pandoc: {pandoc_runs_counter}")
|
||||
|
|
|
@ -89,34 +89,63 @@ def create_uuid_property():
|
|||
uuid_value = uuid.uuid4()
|
||||
return uuid_value
|
||||
|
||||
# Écriture du fichier org
|
||||
with open(filename, "w") as f:
|
||||
uuid = create_uuid_property()
|
||||
f.write(f"""
|
||||
:PROPERTIES:
|
||||
:ID: {uuid}
|
||||
def make_article(config):
|
||||
"""
|
||||
Crée le contenu d'un nouvel article avec les propriétés spécifiées.
|
||||
|
||||
Args:
|
||||
config (dict): Dictionnaire contenant les paramètres de l'article:
|
||||
- uuid (str): Identifiant unique de l'article
|
||||
- slug (str): Slug de l'URL de l'article
|
||||
- title (str): Titre de l'article
|
||||
- date_string_full (str): Date complète au format YYYY-MM-DD HH:MM:SS
|
||||
- date_string (str): Date au format YYYYMMDDHHMMSS
|
||||
- schema_slug (str): Slug avec ou sans préfixe année selon la config
|
||||
- blog_dir (str): Dossier du blog
|
||||
|
||||
Returns:
|
||||
str: Contenu formaté de l'article avec les propriétés et métadonnées
|
||||
"""
|
||||
|
||||
|
||||
return f""":PROPERTIES:
|
||||
:ID: {config.get('uuid')}
|
||||
:END:
|
||||
|
||||
#+title: {args.title}
|
||||
#+title: {config.get('title')}
|
||||
#+post_ID:
|
||||
#+post_slug: {slug}
|
||||
#+post_slug: {config.get('slug')}
|
||||
|
||||
|
||||
#+post_url: https://www.ciperbliss.com/{schema_slug}
|
||||
#+post_title: {args.title}
|
||||
#+post_url: https://www.ciperbliss.com/{config.get('schema_slug')}
|
||||
#+post_title: {config.get('title')}
|
||||
#+post_tags:
|
||||
#+post_series:
|
||||
#+post_type: post
|
||||
#+post_status: publish
|
||||
#+post_picture:
|
||||
#+post_date_published: <{date_string_full}>
|
||||
#+post_date_modified: <{date_string_full}>
|
||||
#+post_index_page_roam_id: {uuid}
|
||||
#+BLOG: {args.blog_dir}
|
||||
#+post_date_published: <{config.get('date_string_full')}>
|
||||
#+post_date_modified: <{config.get('date_string_full')}>
|
||||
#+post_index_page_roam_id: {config.get('uuid')}
|
||||
#+BLOG: {config.get('blog_dir')}
|
||||
|
||||
* {args.title}
|
||||
* {config.get('title')}
|
||||
|
||||
|
||||
""")
|
||||
"""
|
||||
|
||||
# Écriture du fichier org
|
||||
with open(filename, "w") as f:
|
||||
uuid = create_uuid_property()
|
||||
config={
|
||||
'uuid': uuid,
|
||||
'slug': slug,
|
||||
'title': args.title,
|
||||
'date_string_full': date_string_full,
|
||||
'date_string': date_string,
|
||||
'schema_slug': schema_slug,
|
||||
'blog_dir': args.blog_dir,
|
||||
}
|
||||
f.write(make_article(config))
|
||||
|
||||
print(f"Le fichier '{filename}' a été créé avec succès.")
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
* Hi, giminiciens
|
||||
hop hop hello in English
|
|
@ -0,0 +1,25 @@
|
|||
:PROPERTIES:
|
||||
:ID: 41decd55-85b9-43a6-9c24-2da4985f2d87
|
||||
:END:
|
||||
|
||||
#+title: Coucou gemini en 2025
|
||||
#+post_ID:
|
||||
#+post_slug: coucou-gemini-en-2025
|
||||
|
||||
|
||||
#+post_url: https://www.ciperbliss.com/2025/coucou-gemini-en-2025
|
||||
#+post_title: Coucou gemini en 2025
|
||||
#+post_tags:
|
||||
#+post_series:
|
||||
#+post_type: post
|
||||
#+post_status: publish
|
||||
#+post_picture:
|
||||
#+post_date_published: <2025-02-27 15:41:04>
|
||||
#+post_date_modified: <2025-02-27 15:41:04>
|
||||
#+post_index_page_roam_id: 41decd55-85b9-43a6-9c24-2da4985f2d87
|
||||
#+BLOG: dragonfeu_blog
|
||||
|
||||
* Hey gemini in 2025
|
||||
|
||||
hey yoooooooooooo
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
:PROPERTIES:
|
||||
:ID: 7a77f219-b581-4a67-be7a-c66588b7e3f7
|
||||
:END:
|
||||
|
||||
#+title: Coucou gemini en 2025
|
||||
#+post_ID:
|
||||
#+post_slug: coucou-gemini-en-2025
|
||||
|
||||
|
||||
#+post_url: https://www.ciperbliss.com/2025/coucou-gemini-en-2025
|
||||
#+post_title: Coucou gemini en 2025
|
||||
#+post_tags:
|
||||
#+post_series:
|
||||
#+post_type: post
|
||||
#+post_status: publish
|
||||
#+post_picture:
|
||||
#+post_date_published: <2025-02-27 15:46:59>
|
||||
#+post_date_modified: <2025-02-27 15:46:59>
|
||||
#+post_index_page_roam_id: 7a77f219-b581-4a67-be7a-c66588b7e3f7
|
||||
#+BLOG: dragonfeu_blog
|
||||
|
||||
* Coucou gemini en 2025
|
||||
|
||||
|
72
test_org_conversion.py
Normal file
72
test_org_conversion.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
import pytest
|
||||
from utils import convert_org_to_gemini
|
||||
|
||||
def test_org_to_gemini_conversion():
|
||||
# Exemple de contenu org
|
||||
org_content = """#+TITLE: Test Article
|
||||
#+AUTHOR: John Doe
|
||||
#+DATE: 2024-03-14
|
||||
|
||||
* Premier titre
|
||||
Voici du texte simple.
|
||||
|
||||
** Sous-titre
|
||||
- Liste item 1
|
||||
- Liste item 2
|
||||
|
||||
* Deuxième titre
|
||||
Un lien [[https://example.com][Example]]
|
||||
Et du *texte en gras* avec /italique/."""
|
||||
|
||||
# Convertir le contenu directement
|
||||
result = convert_org_to_gemini(org_content)
|
||||
result = result.strip()
|
||||
print(f"result: {result}")
|
||||
# Vérifier les éléments clés de la conversion
|
||||
assert "# Premier titre" in result
|
||||
assert "## Sous-titre" in result
|
||||
assert "* Liste item 1" in result
|
||||
assert "* Liste item 2" in result
|
||||
assert "=> https://example.com Example" in result
|
||||
|
||||
def test_org_to_gemini_tags():
|
||||
"""Test de la détection des tags"""
|
||||
org_content = """#+TITLE: Test Article
|
||||
#+TAGS: chaton, mignon, félin
|
||||
|
||||
* Un article sur les chatons
|
||||
Du contenu sur les chatons..."""
|
||||
|
||||
result = find_tags_in_org_content(org_content)
|
||||
assert "chaton" in result, "Le tag 'chaton' devrait être présent dans le résultat"
|
||||
|
||||
|
||||
# def test_org_to_gemini_code_blocks():
|
||||
# """Test de la conversion des blocs de code"""
|
||||
# org_content = """#+BEGIN_SRC python
|
||||
# def hello():
|
||||
# print("Hello, World!")
|
||||
# #+END_SRC"""
|
||||
|
||||
# result = convert_org_to_gemini(org_content)
|
||||
# assert "```python" in result
|
||||
# assert "def hello():" in result
|
||||
# assert 'print("Hello, World!")' in result
|
||||
# assert "```" in result
|
||||
|
||||
# def test_org_to_gemini_tables():
|
||||
# """Test de la conversion des tableaux"""
|
||||
# org_content = """| Colonne 1 | Colonne 2 |
|
||||
# |-----------|-----------|
|
||||
# | Valeur 1 | Valeur 2 |
|
||||
# | Valeur 3 | Valeur 4 |"""
|
||||
|
||||
# result = convert_org_to_gemini(org_content)
|
||||
# # Vérifier que le tableau est converti en texte lisible
|
||||
# assert "Colonne 1" in result
|
||||
# assert "Colonne 2" in result
|
||||
# assert "Valeur 1" in result
|
||||
# assert "Valeur 2" in result
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
183
utils.py
183
utils.py
|
@ -5,6 +5,9 @@ import shutil
|
|||
from datetime import datetime
|
||||
import unicodedata
|
||||
import pypandoc
|
||||
import subprocess
|
||||
import tempfile
|
||||
from md2gemini import md2gemini
|
||||
|
||||
from website_config import *
|
||||
|
||||
|
@ -82,22 +85,23 @@ def get_blog_template_conf(blogname) -> dict:
|
|||
else:
|
||||
return configs_sites[blogname]
|
||||
|
||||
def find_year_and_slug_on_filename(fichier):
|
||||
fichier = fichier.replace('..', '.')
|
||||
slug = ''
|
||||
annee = datetime.now().year
|
||||
date_str = f'{annee}-00-00'
|
||||
date = f'{annee}-00-00'
|
||||
boom = fichier.split('__')
|
||||
def find_year_and_slug_on_filename(filename):
|
||||
print(f"Traitement du fichier: {filename}") # Debug
|
||||
try:
|
||||
# Supposons que le format attendu est "YYYYMMDDHHMMSS-slug.org"
|
||||
date_str = filename[:14] # Prend les 14 premiers caractères pour la date
|
||||
annee = date_str[:4] # Prend les 4 premiers caractères pour l'année
|
||||
|
||||
if boom :
|
||||
date_str = boom[0]
|
||||
annee = date_str[:4]
|
||||
slug = boom[1].replace('.org', '')
|
||||
if "-" in date_str:
|
||||
slug = enlever_premier_tiret_ou_underscore(slug)
|
||||
return [date_str, annee, slug]
|
||||
return [date_str, annee, fichier.replace(' ', '-').replace('.org', '')]
|
||||
# Gestion plus robuste du slug
|
||||
if '-' in filename:
|
||||
slug = filename.split('-', 1)[1].replace('.org', '')
|
||||
else:
|
||||
slug = filename.replace('.org', '')
|
||||
|
||||
return date_str, annee, slug
|
||||
except Exception as e:
|
||||
print(f"Format de fichier non standard: {filename}")
|
||||
return None, None, filename.replace('.org', '')
|
||||
|
||||
|
||||
def enlever_premier_tiret_ou_underscore(chaîne):
|
||||
|
@ -212,21 +216,42 @@ def add_tags_from_content(tags=None, file_content="", words_to_check=None):
|
|||
tags.add(word)
|
||||
|
||||
return tags
|
||||
# Variable globale pour stocker les fichiers sans tags
|
||||
untagged_files = []
|
||||
|
||||
def extract_tags_from_file(file_path, excluded_tags):
|
||||
def save_untagged_files(output_file="sources/site_web/build/articles_without_tags.json"):
|
||||
"""
|
||||
Sauvegarde la liste des fichiers sans tags dans un fichier JSON.
|
||||
|
||||
:param output_file: Chemin du fichier JSON de sortie
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
# Créer le dossier de sortie si nécessaire
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
|
||||
print('save_untagged_files', len(untagged_files))
|
||||
# Sauvegarder la liste dans le fichier JSON
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(untagged_files, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
def extract_tags_from_file(file_path, excluded_tags, auto_detected_tags_list=global_config['auto_tag_terms']):
|
||||
tags = set()
|
||||
with open(file_path, 'r', encoding='utf-8') as file_content:
|
||||
tag_found = False
|
||||
for line in file_content:
|
||||
if global_config['automatic_tagging_enabled']:
|
||||
tags = add_tags_from_content(tags, line, global_config['auto_tag_terms'])
|
||||
if global_config['automatic_tagging_enabled'] and len(auto_detected_tags_list) > 0:
|
||||
tags = add_tags_from_content(tags, line, auto_detected_tags_list)
|
||||
# Check for orgmode tags :tag1:tag2:
|
||||
if ':' in line:
|
||||
for word in line.split():
|
||||
if len(word) and word.startswith(':') and word.endswith(':'):
|
||||
tag = word[1:-1]
|
||||
if tag not in excluded_tags:
|
||||
tags.add(tag)
|
||||
if global_config.get('automatic_tagging_org_files', True):
|
||||
if ':' in line:
|
||||
for word in line.split():
|
||||
if len(word) > 1 and word.startswith(':') and word.endswith(':'):
|
||||
tag = word[1:-1]
|
||||
if tag not in excluded_tags:
|
||||
tags.add(tag)
|
||||
tag_found = True
|
||||
# Check for #+tags: tag1,tag2
|
||||
if line.startswith('#+tags:'):
|
||||
|
@ -236,7 +261,8 @@ def extract_tags_from_file(file_path, excluded_tags):
|
|||
tags.add(tag)
|
||||
tag_found = True
|
||||
|
||||
# if not tag_found:
|
||||
if not tag_found:
|
||||
untagged_files.append(file_path)
|
||||
# print('no tag in the article', file_path)
|
||||
return tags
|
||||
|
||||
|
@ -380,3 +406,110 @@ def convert_org_to_html(org_file, output_html_file):
|
|||
print(f"Conversion réussie : {org_file} -> {output_html_file}")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de la conversion de {org_file} : {e}")
|
||||
|
||||
|
||||
|
||||
def get_first_picture_url(content):
|
||||
# Utiliser une expression régulière pour
|
||||
# trouver la première URL d'image dans le contenu
|
||||
pattern = r'\[\[(.*?)\]\]'
|
||||
match = re.search(pattern, content)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def org_to_gmi(org_text: str, output_filename_slug: str) -> str:
|
||||
"""
|
||||
Convertit un texte au format Org en un fichier au format GMI (Gemini)
|
||||
en utilisant pypandoc.
|
||||
|
||||
Args:
|
||||
- org_text (str): Le texte au format Org à convertir.
|
||||
- output_file (str): Chemin du fichier de sortie au format GMI, sans avoir à préciser l'extension.
|
||||
"""
|
||||
output = """
|
||||
# mock land output
|
||||
===========
|
||||
|
||||
blah blah blah
|
||||
|
||||
-----------------
|
||||
Tykayn blog mock content
|
||||
-----------------
|
||||
|
||||
Navigation:
|
||||
|
||||
=> accueil.gmi Accueil
|
||||
=> a-propos.gmi à propos
|
||||
"""
|
||||
# Conversion du texte Org en GMI via Pandoc
|
||||
try:
|
||||
output = pypandoc.convert_text(org_text, 'markdown', format='org')
|
||||
except RuntimeError as e:
|
||||
print(f"Erreur de conversion : {e}")
|
||||
return
|
||||
|
||||
# Sauvegarde du contenu GMI dans un fichier
|
||||
try:
|
||||
with open(destination_gmi+'/'+output_filename_slug+'.gmi', 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
print(f"Fichier GMI sauvegardé avec succès : {output_filename_slug}")
|
||||
except OSError as e:
|
||||
print(f"Erreur lors de la sauvegarde du fichier : {e}")
|
||||
return output
|
||||
|
||||
def count_files_in_directories(directories):
|
||||
total_count = 0
|
||||
for directory in directories:
|
||||
for root, dirs, files in os.walk(directory):
|
||||
total_count += len(files)
|
||||
return total_count
|
||||
|
||||
|
||||
def convert_org_to_gemini(org_content):
|
||||
"""
|
||||
Convertit un contenu org en gemini en utilisant pandoc et md2gemini
|
||||
|
||||
Args:
|
||||
org_content (str): Contenu au format org
|
||||
|
||||
Returns:
|
||||
str: Contenu converti en format gemini
|
||||
"""
|
||||
try:
|
||||
# Créer un fichier temporaire avec le contenu org
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.org', encoding='utf-8') as temp_org:
|
||||
temp_org.write(org_content)
|
||||
temp_org.flush()
|
||||
|
||||
# Première étape : conversion org vers markdown avec pandoc
|
||||
pandoc_cmd = [
|
||||
'pandoc',
|
||||
'-f', 'org',
|
||||
'-t', 'markdown',
|
||||
temp_org.name
|
||||
]
|
||||
|
||||
markdown_content = subprocess.check_output(
|
||||
pandoc_cmd,
|
||||
text=True,
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
|
||||
# Deuxième étape : conversion markdown vers gemini avec md2gemini
|
||||
gemini_content = md2gemini(
|
||||
markdown_content,
|
||||
frontmatter=True,
|
||||
links='inline',
|
||||
)
|
||||
|
||||
return gemini_content.strip()
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Erreur lors de la conversion avec pandoc: {e.stderr}")
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de la conversion: {str(e)}")
|
||||
raise
|
||||
|
|
|
@ -4,7 +4,9 @@ global_config = {
|
|||
"slug_with_year": True,
|
||||
# "show_logs": False,
|
||||
"show_logs": True,
|
||||
"lang_default": "fr",
|
||||
"automatic_tagging_enabled": True,
|
||||
"automatic_tagging_org_files": True,
|
||||
"rebuild_files_filter": 2024,
|
||||
"posts_per_page": 10,
|
||||
"source_files_extension": "org",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue