mirror of
https://forge.chapril.org/tykayn/orgmode-to-gemini-blog
synced 2025-06-20 09:04:42 +02:00
up paths
This commit is contained in:
parent
895996cf6b
commit
8daf1e23e4
2055 changed files with 119377 additions and 31850 deletions
103
find_correspondances.py
Normal file
103
find_correspondances.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
# on recherche les anciens ID org roam, on les relie au titre d'un article,
|
||||
# et on met cet ID dans les fichiers d'export wp2org.
|
||||
|
||||
# Chemins des dossiers
|
||||
previous_dir = 'sources/org-roam-export/qzine_blog'
|
||||
after_dir = 'sources/generated_wp2org_output/qzine'
|
||||
|
||||
# Fonction pour lire un fichier orgmode et extraire le titre de niveau 1 et l'ID
|
||||
def read_org_file(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
title_match = re.search(r'^\*\* (.+)', content, re.MULTILINE)
|
||||
id_match = re.search(r'^:PROPERTIES:\s*:ID:\s*([^\s]+)\s*:END:', content, re.MULTILINE)
|
||||
title = title_match.group(1) if title_match else None
|
||||
id_value = id_match.group(1) if id_match else None
|
||||
return title, id_value
|
||||
|
||||
def read_org_file_exported(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
title_match = re.search(r'^\* (.+)', content, re.MULTILINE)
|
||||
id_match = re.search(r'^:PROPERTIES:\s*:ID:\s*([^\s]+)\s*:END:', content, re.MULTILINE)
|
||||
title = title_match.group(1) if title_match else None
|
||||
id_value = id_match.group(1) if id_match else None
|
||||
return title, id_value
|
||||
|
||||
# écrire un fichier orgmode avec un nouvel ID
|
||||
def write_org_file(file_path, title, new_id):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
updated_content = re.sub(r'^:PROPERTIES:\s*:ID:\s*[^\s]+\s*:END:', f':PROPERTIES:\n:ID: {new_id}\n:END:', content, flags=re.MULTILINE)
|
||||
with open(file_path, 'w', encoding='utf-8') as file:
|
||||
file.write(updated_content)
|
||||
|
||||
|
||||
def find_correspondances(previous_dir, after_dir):
|
||||
# Extraire les informations des fichiers du dossier previous
|
||||
previous_files = defaultdict(dict)
|
||||
for file_name in os.listdir(previous_dir):
|
||||
if file_name.endswith('.org'):
|
||||
title, id_value = read_org_file(os.path.join(previous_dir, file_name))
|
||||
if title and id_value:
|
||||
previous_files[title]['id'] = id_value
|
||||
previous_files[title]['file_name'] = file_name
|
||||
|
||||
|
||||
# Extraire et afficher les titres des fichiers du dossier previous
|
||||
for file_name in os.listdir(previous_dir):
|
||||
if file_name.endswith('.org'):
|
||||
file_path = os.path.join(previous_dir, file_name)
|
||||
title = read_org_file(file_path)
|
||||
if not title:
|
||||
# print(f"Fichier: {file_name}, Titre: {title}")
|
||||
# else:
|
||||
print(f"Fichier: {previous_dir}/{file_name}, Titre non trouvé")
|
||||
|
||||
# Extraire les informations des fichiers du dossier after
|
||||
after_files = {}
|
||||
for file_name in os.listdir(after_dir):
|
||||
if file_name.endswith('.org'):
|
||||
title, _ = read_org_file_exported(os.path.join(after_dir, file_name))
|
||||
if title:
|
||||
after_files[title] = file_name
|
||||
|
||||
# Établir les correspondances et réécrire les fichiers
|
||||
matched_count = 0
|
||||
unmatched_after_files = []
|
||||
|
||||
for title, file_info in after_files.items():
|
||||
if title in previous_files:
|
||||
matched_count += 1
|
||||
new_id = previous_files[title]['id']
|
||||
write_org_file(os.path.join(after_dir, file_info), title, new_id)
|
||||
else:
|
||||
unmatched_after_files.append(file_info)
|
||||
|
||||
# Calculer la proportion de correspondances trouvées
|
||||
total_after_files = len(after_files)
|
||||
if total_after_files > 0:
|
||||
match_proportion = matched_count / total_after_files * 100
|
||||
else:
|
||||
match_proportion = 0
|
||||
|
||||
# Afficher les résultats
|
||||
print(f"matched_count: {matched_count}")
|
||||
print(f"Proportion de correspondances trouvées : {match_proportion:.2f}% , {len(unmatched_after_files)} fichiers non trouvés")
|
||||
if unmatched_after_files:
|
||||
print("Fichiers sans correspondance dans le dossier after :")
|
||||
for file_name in unmatched_after_files:
|
||||
print(f" {previous_dir}/{file_name}")
|
||||
else:
|
||||
print("Tous les fichiers dans le dossier after ont été mis en correspondance.")
|
||||
|
||||
|
||||
|
||||
find_correspondances('sources/org-roam-export/qzine_blog', 'sources/generated_wp2org_output/qzine')
|
||||
find_correspondances('sources/org-roam-export/tykayn_blog', 'sources/generated_wp2org_output/tkblog')
|
||||
find_correspondances('sources/org-roam-export/cipherbliss_blog', 'sources/generated_wp2org_output/cipherbliss')
|
||||
find_correspondances('sources/org-roam-export/cil_gometz', 'sources/generated_wp2org_output/cil')
|
||||
find_correspondances('sources/org-roam-export/helia_blog', 'sources/generated_wp2org_output/helia')
|
Loading…
Add table
Add a link
Reference in a new issue