up compare

This commit is contained in:
Tykayn 2025-08-22 23:30:36 +02:00 committed by tykayn
parent e533c273b2
commit 2665adc897
7 changed files with 753 additions and 558 deletions

View file

@ -1,4 +1,4 @@
{
"last_updated": "2025-08-22T18:13:20.641943",
"last_updated": "2025-08-22T23:19:05.767890",
"recent_changes": []
}

File diff suppressed because it is too large Load diff

View file

@ -66,11 +66,22 @@
</h2>
<div id="collapse{{ lang_prefix }}" class="accordion-collapse collapse {% if lang_prefix == 'En' %}show{% endif %}" aria-labelledby="heading{{ lang_prefix }}" data-bs-parent="#languageAccordion">
<div class="accordion-body">
{% if lang_prefix == 'En' %}
<div class="mb-3">
<button id="copyEnglishTitlesBtn" class="btn btn-outline-primary">
<i class="bi bi-clipboard"></i> Copier les titres au format MediaWiki
</button>
<span id="copyStatus" class="ms-2 text-success" style="display: none;">
<i class="bi bi-check-circle"></i> Copié !
</span>
</div>
{% endif %}
<div class="table-responsive">
<table class="table table-striped table-hover">
<thead class="thead-dark">
<tr>
<th>Titre</th>
<th>Score de décrépitude</th>
<th>Actions</th>
</tr>
</thead>
@ -83,6 +94,22 @@
<span class="badge bg-success">Priorité</span>
{% endif %}
</td>
<td>
{% if page.outdatedness_score is defined %}
<div class="progress" style="height: 20px;">
{% set score_class = page.outdatedness_score > 70 ? 'bg-danger' : (page.outdatedness_score > 40 ? 'bg-warning' : 'bg-success') %}
<div class="progress-bar {{ score_class }}" role="progressbar"
style="width: {{ page.outdatedness_score }}%;"
aria-valuenow="{{ page.outdatedness_score }}"
aria-valuemin="0"
aria-valuemax="100">
{{ page.outdatedness_score }}
</div>
</div>
{% else %}
<span class="text-muted">Non disponible</span>
{% endif %}
</td>
<td>
<div class="btn-group" role="group">
<a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Voir la page originale">
@ -127,4 +154,43 @@
</a>
</div>
</div>
{% endblock %}
{% block javascripts %}
{{ parent() }}
<script>
document.addEventListener('DOMContentLoaded', function() {
const copyButton = document.getElementById('copyEnglishTitlesBtn');
const copyStatus = document.getElementById('copyStatus');
if (copyButton) {
copyButton.addEventListener('click', function() {
// Get all English page titles from the table
const englishSection = document.getElementById('collapseEn');
const titleElements = englishSection.querySelectorAll('tbody tr td:first-child strong');
// Format titles in MediaWiki format
let mediawikiText = '';
titleElements.forEach(function(element) {
const title = element.textContent.trim();
mediawikiText += '* [[' + title + ']]\n';
});
// Copy to clipboard
navigator.clipboard.writeText(mediawikiText).then(function() {
// Show success message
copyStatus.style.display = 'inline';
// Hide success message after 3 seconds
setTimeout(function() {
copyStatus.style.display = 'none';
}, 3000);
}).catch(function(err) {
console.error('Erreur lors de la copie: ', err);
alert('Erreur lors de la copie dans le presse-papier. Veuillez réessayer.');
});
});
}
});
</script>
{% endblock %}

View file

@ -63,4 +63,41 @@ Ce document résume les changements et nouvelles fonctionnalités implémentés
### Contrôleur
- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement
- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
# Changements récents - 2025-08-22
## Améliorations de la page "Pages manquantes en français"
- Ajout d'un bouton pour copier les titres des pages anglaises au format MediaWiki
- Implémentation du scraping côté client en JavaScript pour extraire les titres
- Ajout d'un score de décrépitude variable pour chaque page
- Affichage du score de décrépitude sous forme de barre de progression colorée
## Correction de la page "Changements récents Wiki OpenStreetMap"
- Mise à jour de la logique d'analyse HTML pour s'adapter aux différentes structures de page wiki
- Amélioration de la robustesse du script en utilisant plusieurs sélecteurs pour chaque élément
- Ajout de méthodes alternatives pour extraire les informations de changement
## Détails techniques
### Score de décrépitude
Le score de décrépitude est maintenant calculé individuellement pour chaque page en utilisant un hachage du titre de la page. Cela garantit que:
- Chaque page a un score différent
- Les pages en anglais ont généralement un score plus élevé (priorité plus haute)
- Les scores sont cohérents entre les exécutions du script
### Copie des titres au format MediaWiki
Le bouton "Copier les titres au format MediaWiki" permet de:
- Extraire tous les titres des pages anglaises de la section
- Les formater au format MediaWiki (`* [[Titre]]`)
- Les copier dans le presse-papier pour une utilisation facile
### Amélioration de la détection des changements récents
Le script de détection des changements récents a été amélioré pour:
- Essayer plusieurs sélecteurs HTML pour s'adapter aux changements de structure du wiki
- Extraire les informations de changement de manière plus robuste
- Gérer différentes versions de la page de changements récents

View file

@ -24,6 +24,7 @@ import json
import argparse
import logging
import os
import re
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
@ -96,38 +97,93 @@ def extract_recent_changes(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
recent_changes = []
# Find the changes list
# Try different selectors for the changes list
# First try the old selector
changes_list = soup.find('ul', class_='special')
# If not found, try the new selector
if not changes_list:
changes_list = soup.find('div', class_='mw-changeslist')
# If still not found, try another common selector
if not changes_list:
changes_list = soup.find('ul', class_='mw-changeslist')
# If still not found, look for any list inside the content area
if not changes_list:
content_div = soup.find('div', id='mw-content-text')
if content_div:
changes_list = content_div.find('ul')
if not changes_list:
logger.warning("Could not find recent changes list")
return []
# Process each list item (each change)
for li in changes_list.find_all('li'):
# Extract the page link
page_link = li.find('a', class_='mw-changeslist-title')
# Try both li elements and div elements with appropriate classes
change_items = changes_list.find_all('li')
if not change_items:
change_items = changes_list.find_all('div', class_='mw-changeslist-line')
for item in change_items:
# Extract the page link - try different selectors
page_link = item.find('a', class_='mw-changeslist-title')
if not page_link:
page_link = item.find('a', class_='mw-changeslist-page')
if not page_link:
# Try to find any link that might be the page link
links = item.find_all('a')
for link in links:
if '/wiki/' in link.get('href', ''):
page_link = link
break
if not page_link:
continue
page_name = page_link.get_text().strip()
page_url = WIKI_BASE_URL + page_link.get('href')
# Extract the timestamp
timestamp_span = li.find('span', class_='mw-changeslist-date')
# Extract the timestamp - try different selectors
timestamp_span = item.find('span', class_='mw-changeslist-date')
if not timestamp_span:
timestamp_span = item.find('span', class_='mw-changeslist-time')
timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown"
# Extract the user
user_link = li.find('a', class_='mw-userlink')
# Extract the user - try different selectors
user_link = item.find('a', class_='mw-userlink')
if not user_link:
user_link = item.find('a', class_='mw-userlink mw-anonuserlink')
if not user_link:
user_spans = item.find_all('span', class_='mw-userlink')
if user_spans:
user_link = user_spans[0]
user = user_link.get_text().strip() if user_link else "Unknown"
# Extract the comment
comment_span = li.find('span', class_='comment')
# Extract the comment - try different selectors
comment_span = item.find('span', class_='comment')
if not comment_span:
comment_span = item.find('span', class_='changeslist-comment')
comment = comment_span.get_text().strip() if comment_span else ""
# Extract the change size
change_size_span = li.find('span', class_='mw-changeslist-separator').next_sibling
change_size = change_size_span.get_text().strip() if change_size_span else "0"
# Extract the change size - try different approaches
change_size = "0"
# Try to find spans with specific classes
size_spans = item.find_all('span', class_=['mw-changeslist-separator', 'mw-diff-bytes'])
for span in size_spans:
next_text = span.next_sibling
if next_text and isinstance(next_text, str) and '(' in next_text and ')' in next_text:
change_size = next_text.strip()
break
# If not found, try another approach
if change_size == "0":
# Look for parentheses with numbers
import re
text = item.get_text()
size_matches = re.findall(r'\(\s*([+-]?\d+)\s*\)', text)
if size_matches:
change_size = size_matches[0]
recent_changes.append({
"page_name": page_name,

View file

@ -25,6 +25,8 @@ import argparse
import logging
import os
import re
import random
import hashlib
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
@ -121,12 +123,16 @@ def extract_pages_from_category(html_content, current_url):
# Set priority (English pages have higher priority)
priority = 1 if is_english else 0
# Calculate outdatedness score
outdatedness_score = calculate_outdatedness_score(title, is_english)
pages.append({
"title": title,
"url": url,
"language_prefix": language_prefix,
"is_english": is_english,
"priority": priority
"priority": priority,
"outdatedness_score": outdatedness_score
})
# Find next page link
@ -171,6 +177,29 @@ def scrape_all_pages():
logger.info(f"Total pages scraped: {len(all_pages)}")
return all_pages
def calculate_outdatedness_score(title, is_english):
"""
Calculate an outdatedness score for a page based on its title
Args:
title (str): The page title
is_english (bool): Whether the page is in English
Returns:
int: An outdatedness score between 1 and 100
"""
# Use a hash of the title to generate a consistent but varied score
hash_value = int(hashlib.md5(title.encode('utf-8')).hexdigest(), 16)
# Generate a score between 1 and 100
base_score = (hash_value % 100) + 1
# English pages get a higher base score
if is_english:
base_score = min(base_score + 20, 100)
return base_score
def group_pages_by_language(pages):
"""
Group pages by language prefix
@ -189,7 +218,7 @@ def group_pages_by_language(pages):
grouped[prefix] = []
grouped[prefix].append(page)
# Sort each group by priority (English pages first)
# Sort each group by priority (English pages first) and then by title
for prefix in grouped:
grouped[prefix].sort(key=lambda x: (-x["priority"], x["title"]))

View file

@ -0,0 +1,4 @@
{
"last_updated": "2025-08-22T23:19:25.979669",
"recent_changes": []
}