unify slugs

This commit is contained in:
Tykayn 2024-11-15 23:55:20 +01:00 committed by tykayn
parent a0bb742d4c
commit 63d51307d5
22 changed files with 90 additions and 2177 deletions

View file

@ -3,6 +3,7 @@ import os
import re
import shutil
from datetime import datetime
import unicodedata
from website_config import *
@ -183,19 +184,47 @@ def remove_hint_html(text):
return re.sub(pattern, replacement, text, flags=re.DOTALL)
def detect_slug_in_file_basename(file_basename):
def slugify_title(title_text):
"""
Transforme un titre en un slug valide.
:param title_text: Titre en texte (str).
:return: Slug en minuscules avec des tirets (str).
"""
title_text = unicodedata.normalize('NFKD', title_text).encode('ascii', 'ignore').decode('ascii')
title_text = title_text.lower()
title_text = re.sub(r'[^a-z0-9\s-]', '', title_text)
title_text = re.sub(r'\s+', '-', title_text)
title_text = re.sub(r'-+', '-', title_text)
title_text = title_text.strip('-')
return title_text
def detect_slug_in_file_basename(file_basename) -> str:
"""
Extrait l'année et le slug du nom de fichier selon le format spécifié.
:param file_basename: Nom de fichier (str).
:return: Tuple contenant l'année et le slug (année, slug) ou None si non trouvé.
"""
pattern = r'^(\d{4})\d{8}(.+)\.org$'
pattern = r'^(\d{4})\d{10}(.+)\.org$'
match = re.match(pattern, file_basename)
if match:
year = match.group(1)
slug = match.group(2)
# prendre la partie finale du nom du fichier
splitted = slug.split('_')
# print('len(splitted)', len(splitted), splitted)
if len(splitted) > 1:
slug = splitted[len(splitted)-1]
final_slug = f"{year}/{slug}"
return final_slug
# final_slug=slug.replace("_cipherbliss_blog_","")
# final_slug=final_slug.replace("_blog_cil_gometz_","")
slug=enlever_premier_tiret_ou_underscore(slug)
slug = f"{year}/{slug}"
return slug
return None