mirror of
https://forge.chapril.org/tykayn/orgmode-to-gemini-blog
synced 2025-11-19 23:00:35 +01:00
396 lines
12 KiB
Python
Executable file
396 lines
12 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Script pour exporter un ou plusieurs blogs au format EPUB en utilisant Calibre.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import argparse
|
|
import subprocess
|
|
import shutil
|
|
import tempfile
|
|
from datetime import datetime
|
|
import locale
|
|
|
|
try:
|
|
import pypandoc
|
|
HAS_PYPANDOC = True
|
|
except ImportError:
|
|
HAS_PYPANDOC = False
|
|
print("Attention: pypandoc n'est pas installé. Installation requise: pip install pypandoc")
|
|
|
|
from utils.utils import (
|
|
find_extract_in_content_org,
|
|
find_first_level1_title,
|
|
find_year_and_slug_on_filename,
|
|
get_blog_template_conf
|
|
)
|
|
from website_config import configs_sites
|
|
|
|
|
|
def verifier_calibre():
|
|
"""Vérifie si Calibre est installé."""
|
|
try:
|
|
subprocess.run(['ebook-convert', '--version'],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
check=True)
|
|
return True
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
return False
|
|
|
|
|
|
def extraire_date_du_contenu(content):
|
|
"""Extrait la date de création du contenu org."""
|
|
# Chercher #+CREATED
|
|
match = re.search(r'#\+CREATED:\s*(\d{4}-\d{2}-\d{2})', content)
|
|
if match:
|
|
try:
|
|
return datetime.strptime(match.group(1), '%Y-%m-%d')
|
|
except ValueError:
|
|
pass
|
|
|
|
# Chercher #+post_date_published
|
|
match = re.search(r'#\+post_date_published:\s*(\d{4}-\d{2}-\d{2})', content)
|
|
if match:
|
|
try:
|
|
return datetime.strptime(match.group(1), '%Y-%m-%d')
|
|
except ValueError:
|
|
pass
|
|
|
|
return None
|
|
|
|
|
|
def extraire_date_du_fichier(filename):
|
|
"""Extrait la date du nom de fichier."""
|
|
try:
|
|
date_str, annee, slug = find_year_and_slug_on_filename(filename)
|
|
if date_str and len(date_str) >= 8:
|
|
year = date_str[:4]
|
|
month = date_str[4:6]
|
|
day = date_str[6:8]
|
|
return datetime(int(year), int(month), int(day))
|
|
except:
|
|
pass
|
|
return None
|
|
|
|
|
|
def collecter_articles(blog_path):
|
|
"""Collecte tous les articles .org d'un blog."""
|
|
articles = []
|
|
|
|
# Chercher dans lang_fr et lang_en
|
|
for lang_dir in ['lang_fr', 'lang_en']:
|
|
lang_path = os.path.join(blog_path, lang_dir)
|
|
if not os.path.exists(lang_path):
|
|
continue
|
|
|
|
# Lister tous les fichiers .org
|
|
for filename in os.listdir(lang_path):
|
|
if filename.endswith('.org'):
|
|
filepath = os.path.join(lang_path, filename)
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Extraire les métadonnées
|
|
title = find_first_level1_title(content)
|
|
if not title:
|
|
continue
|
|
|
|
# Nettoyer le titre
|
|
title = title.replace('*', '').strip()
|
|
|
|
date = extraire_date_du_contenu(content)
|
|
if not date:
|
|
date = extraire_date_du_fichier(filename)
|
|
if not date:
|
|
date = datetime.fromtimestamp(os.path.getmtime(filepath))
|
|
|
|
articles.append({
|
|
'title': title,
|
|
'content': content,
|
|
'date': date,
|
|
'filename': filename
|
|
})
|
|
except Exception as e:
|
|
print(f" Erreur lors de la lecture de {filepath}: {e}")
|
|
|
|
# Trier par date (plus ancien en premier)
|
|
articles.sort(key=lambda x: x['date'])
|
|
|
|
return articles
|
|
|
|
|
|
def convertir_org_en_html(org_content):
|
|
"""Convertit le contenu org en HTML."""
|
|
if not HAS_PYPANDOC:
|
|
raise RuntimeError("pypandoc n'est pas installé")
|
|
|
|
# Nettoyer le contenu (retirer les métadonnées)
|
|
content_clean = find_extract_in_content_org(org_content)
|
|
|
|
# Convertir avec pandoc
|
|
try:
|
|
html = pypandoc.convert_text(content_clean, 'html', format='org')
|
|
return html
|
|
except Exception as e:
|
|
print(f" Erreur lors de la conversion: {e}")
|
|
return f"<p>Erreur lors de la conversion: {e}</p>"
|
|
|
|
|
|
def generer_html_epub(articles, blog_config):
|
|
"""Génère un fichier HTML combiné pour l'export EPUB."""
|
|
html_parts = []
|
|
|
|
# En-tête HTML
|
|
html_parts.append('''<!DOCTYPE html>
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="fr">
|
|
<head>
|
|
<meta charset="UTF-8"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
|
<title>{}</title>
|
|
<style>
|
|
body {{
|
|
font-family: serif;
|
|
max-width: 800px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
line-height: 1.6;
|
|
}}
|
|
h1 {{
|
|
border-bottom: 2px solid #333;
|
|
padding-bottom: 10px;
|
|
margin-top: 40px;
|
|
}}
|
|
h2 {{
|
|
color: #555;
|
|
margin-top: 30px;
|
|
}}
|
|
.article-date {{
|
|
color: #666;
|
|
font-size: 0.9em;
|
|
margin-bottom: 20px;
|
|
}}
|
|
img {{
|
|
max-width: 100%;
|
|
height: auto;
|
|
}}
|
|
a {{
|
|
color: #0066cc;
|
|
}}
|
|
code {{
|
|
background-color: #f4f4f4;
|
|
padding: 2px 4px;
|
|
border-radius: 3px;
|
|
}}
|
|
pre {{
|
|
background-color: #f4f4f4;
|
|
padding: 10px;
|
|
border-radius: 5px;
|
|
overflow-x: auto;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>{}</h1>
|
|
<p><em>{}</em></p>
|
|
<p>Auteur: {}</p>
|
|
<p>Export généré le: {}</p>
|
|
'''.format(
|
|
blog_config.get('BLOG_TITLE', 'Blog'),
|
|
blog_config.get('BLOG_TITLE', 'Blog'),
|
|
blog_config.get('BLOG_SUBTITLE', ''),
|
|
blog_config.get('AUTHOR', 'Auteur inconnu'),
|
|
datetime.now().strftime('%d/%m/%Y à %H:%M:%S')
|
|
))
|
|
|
|
# Ajouter chaque article
|
|
for article in articles:
|
|
html_parts.append(f'<h1>{article["title"]}</h1>')
|
|
html_parts.append(f'<div class="article-date">Publié le: {article["date"].strftime("%d/%m/%Y")}</div>')
|
|
|
|
# Convertir le contenu org en HTML
|
|
html_content = convertir_org_en_html(article['content'])
|
|
html_parts.append(html_content)
|
|
|
|
html_parts.append('<hr/>')
|
|
|
|
# Pied de page
|
|
html_parts.append('''
|
|
<div style="margin-top: 50px; padding-top: 20px; border-top: 1px solid #ccc;">
|
|
<p><em>Export généré depuis: {}</em></p>
|
|
<p>{}</p>
|
|
</div>
|
|
</body>
|
|
</html>'''.format(
|
|
blog_config.get('NDD', ''),
|
|
blog_config.get('DESCRIPTION', '')
|
|
))
|
|
|
|
return '\n'.join(html_parts)
|
|
|
|
|
|
def exporter_epub(blogs, output_dir='exports/epub', titre=None):
|
|
"""Exporte un ou plusieurs blogs en EPUB."""
|
|
|
|
# Vérifier que Calibre est installé
|
|
if not verifier_calibre():
|
|
print("Erreur: Calibre n'est pas installé ou ebook-convert n'est pas dans le PATH.")
|
|
print("Installation: https://calibre-ebook.com/download")
|
|
return
|
|
|
|
if not HAS_PYPANDOC:
|
|
print("Erreur: pypandoc n'est pas installé.")
|
|
print("Installation: pip install pypandoc")
|
|
return
|
|
|
|
sources_dir = "sources"
|
|
if not os.path.exists(sources_dir):
|
|
print(f"Erreur: Le dossier {sources_dir} n'existe pas")
|
|
return
|
|
|
|
# Créer le dossier de sortie
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Collecter les articles de tous les blogs
|
|
all_articles = []
|
|
blog_configs = {}
|
|
blog_titles = []
|
|
|
|
for blog_name in blogs:
|
|
blog_path = os.path.join(sources_dir, blog_name)
|
|
if not os.path.exists(blog_path):
|
|
print(f"⚠️ Blog '{blog_name}' introuvable dans {sources_dir}")
|
|
continue
|
|
|
|
print(f"📚 Collecte des articles de {blog_name}...")
|
|
blog_config = get_blog_template_conf(blog_name)
|
|
blog_configs[blog_name] = blog_config
|
|
|
|
articles = collecter_articles(blog_path)
|
|
print(f" {len(articles)} articles trouvés")
|
|
|
|
for article in articles:
|
|
article['blog_name'] = blog_name
|
|
all_articles.append(article)
|
|
|
|
blog_titles.append(blog_config.get('BLOG_TITLE', blog_name))
|
|
|
|
if not all_articles:
|
|
print("Aucun article trouvé.")
|
|
return
|
|
|
|
# Trier tous les articles par date
|
|
all_articles.sort(key=lambda x: x['date'])
|
|
|
|
print(f"\n📖 Total: {len(all_articles)} articles")
|
|
|
|
# Déterminer le titre et l'auteur pour l'EPUB
|
|
if len(blogs) == 1:
|
|
config = blog_configs[blogs[0]]
|
|
epub_title = titre or config.get('BLOG_TITLE', blogs[0])
|
|
epub_author = config.get('AUTHOR', 'Auteur inconnu')
|
|
else:
|
|
epub_title = titre or ' - '.join(blog_titles)
|
|
# Prendre le premier auteur ou combiner si différents
|
|
auteurs = list(set([blog_configs[b].get('AUTHOR', '') for b in blogs]))
|
|
epub_author = auteurs[0] if len(auteurs) == 1 else ' & '.join(auteurs)
|
|
|
|
# Générer le HTML combiné
|
|
print(f"\n🔄 Génération du fichier HTML...")
|
|
|
|
# Pour plusieurs blogs, utiliser la config du premier
|
|
main_config = blog_configs[blogs[0]] if blogs else {}
|
|
|
|
html_content = generer_html_epub(all_articles, main_config)
|
|
|
|
# Créer un fichier temporaire HTML
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.html',
|
|
encoding='utf-8', delete=False) as tmp_html:
|
|
tmp_html.write(html_content)
|
|
tmp_html_path = tmp_html.name
|
|
|
|
# Générer le nom du fichier EPUB
|
|
safe_title = re.sub(r'[^\w\s-]', '', epub_title).strip()
|
|
safe_title = re.sub(r'[-\s]+', '-', safe_title)
|
|
epub_filename = f"{safe_title}.epub"
|
|
epub_path = os.path.join(output_dir, epub_filename)
|
|
|
|
# Convertir en EPUB avec Calibre
|
|
print(f"\n📦 Conversion en EPUB avec Calibre...")
|
|
print(f" Titre: {epub_title}")
|
|
print(f" Auteur: {epub_author}")
|
|
print(f" Fichier: {epub_path}")
|
|
|
|
try:
|
|
cmd = [
|
|
'ebook-convert',
|
|
tmp_html_path,
|
|
epub_path,
|
|
'--title', epub_title,
|
|
'--authors', epub_author,
|
|
'--language', 'fr',
|
|
'--page-breaks-before', '/',
|
|
'--insert-blank-line',
|
|
'--smarten-punctuation',
|
|
'--margin-top', '50',
|
|
'--margin-bottom', '50',
|
|
'--margin-left', '50',
|
|
'--margin-right', '50',
|
|
]
|
|
|
|
# Ajouter la description si disponible
|
|
if main_config.get('DESCRIPTION'):
|
|
cmd.extend(['--comments', main_config.get('DESCRIPTION')])
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
|
|
print(f"\n✅ EPUB généré avec succès: {epub_path}")
|
|
|
|
# Nettoyer le fichier temporaire
|
|
os.unlink(tmp_html_path)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"\n❌ Erreur lors de la conversion:")
|
|
print(f" {e.stderr}")
|
|
os.unlink(tmp_html_path)
|
|
return
|
|
except Exception as e:
|
|
print(f"\n❌ Erreur: {e}")
|
|
if os.path.exists(tmp_html_path):
|
|
os.unlink(tmp_html_path)
|
|
return
|
|
|
|
|
|
def main():
|
|
"""Fonction principale."""
|
|
parser = argparse.ArgumentParser(
|
|
description='Exporte un ou plusieurs blogs au format EPUB avec Calibre'
|
|
)
|
|
parser.add_argument(
|
|
'blogs',
|
|
nargs='+',
|
|
help='Noms des blogs à exporter (ex: tykayn_blog cipherbliss_blog)'
|
|
)
|
|
parser.add_argument(
|
|
'--output',
|
|
'-o',
|
|
default='exports/epub',
|
|
help='Dossier de sortie pour les fichiers EPUB (défaut: exports/epub)'
|
|
)
|
|
parser.add_argument(
|
|
'--titre',
|
|
'-t',
|
|
help='Titre personnalisé pour l\'EPUB (par défaut: titre du blog ou combinaison)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
exporter_epub(args.blogs, args.output, args.titre)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|