up compare

This commit is contained in:
Tykayn 2025-08-22 23:30:36 +02:00 committed by tykayn
parent e533c273b2
commit 2665adc897
7 changed files with 753 additions and 558 deletions

View file

@ -1,4 +1,4 @@
{ {
"last_updated": "2025-08-22T18:13:20.641943", "last_updated": "2025-08-22T23:19:05.767890",
"recent_changes": [] "recent_changes": []
} }

File diff suppressed because it is too large Load diff

View file

@ -66,11 +66,22 @@
</h2> </h2>
<div id="collapse{{ lang_prefix }}" class="accordion-collapse collapse {% if lang_prefix == 'En' %}show{% endif %}" aria-labelledby="heading{{ lang_prefix }}" data-bs-parent="#languageAccordion"> <div id="collapse{{ lang_prefix }}" class="accordion-collapse collapse {% if lang_prefix == 'En' %}show{% endif %}" aria-labelledby="heading{{ lang_prefix }}" data-bs-parent="#languageAccordion">
<div class="accordion-body"> <div class="accordion-body">
{% if lang_prefix == 'En' %}
<div class="mb-3">
<button id="copyEnglishTitlesBtn" class="btn btn-outline-primary">
<i class="bi bi-clipboard"></i> Copier les titres au format MediaWiki
</button>
<span id="copyStatus" class="ms-2 text-success" style="display: none;">
<i class="bi bi-check-circle"></i> Copié !
</span>
</div>
{% endif %}
<div class="table-responsive"> <div class="table-responsive">
<table class="table table-striped table-hover"> <table class="table table-striped table-hover">
<thead class="thead-dark"> <thead class="thead-dark">
<tr> <tr>
<th>Titre</th> <th>Titre</th>
<th>Score de décrépitude</th>
<th>Actions</th> <th>Actions</th>
</tr> </tr>
</thead> </thead>
@ -83,6 +94,22 @@
<span class="badge bg-success">Priorité</span> <span class="badge bg-success">Priorité</span>
{% endif %} {% endif %}
</td> </td>
<td>
{% if page.outdatedness_score is defined %}
<div class="progress" style="height: 20px;">
{% set score_class = page.outdatedness_score > 70 ? 'bg-danger' : (page.outdatedness_score > 40 ? 'bg-warning' : 'bg-success') %}
<div class="progress-bar {{ score_class }}" role="progressbar"
style="width: {{ page.outdatedness_score }}%;"
aria-valuenow="{{ page.outdatedness_score }}"
aria-valuemin="0"
aria-valuemax="100">
{{ page.outdatedness_score }}
</div>
</div>
{% else %}
<span class="text-muted">Non disponible</span>
{% endif %}
</td>
<td> <td>
<div class="btn-group" role="group"> <div class="btn-group" role="group">
<a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Voir la page originale"> <a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Voir la page originale">
@ -128,3 +155,42 @@
</div> </div>
</div> </div>
{% endblock %} {% endblock %}
{% block javascripts %}
{{ parent() }}
<script>
document.addEventListener('DOMContentLoaded', function() {
const copyButton = document.getElementById('copyEnglishTitlesBtn');
const copyStatus = document.getElementById('copyStatus');
if (copyButton) {
copyButton.addEventListener('click', function() {
// Get all English page titles from the table
const englishSection = document.getElementById('collapseEn');
const titleElements = englishSection.querySelectorAll('tbody tr td:first-child strong');
// Format titles in MediaWiki format
let mediawikiText = '';
titleElements.forEach(function(element) {
const title = element.textContent.trim();
mediawikiText += '* [[' + title + ']]\n';
});
// Copy to clipboard
navigator.clipboard.writeText(mediawikiText).then(function() {
// Show success message
copyStatus.style.display = 'inline';
// Hide success message after 3 seconds
setTimeout(function() {
copyStatus.style.display = 'none';
}, 3000);
}).catch(function(err) {
console.error('Erreur lors de la copie: ', err);
alert('Erreur lors de la copie dans le presse-papier. Veuillez réessayer.');
});
});
}
});
</script>
{% endblock %}

View file

@ -64,3 +64,40 @@ Ce document résume les changements et nouvelles fonctionnalités implémentés
- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement - Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement
- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies - La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour - Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
# Changements récents - 2025-08-22
## Améliorations de la page "Pages manquantes en français"
- Ajout d'un bouton pour copier les titres des pages anglaises au format MediaWiki
- Implémentation du scraping côté client en JavaScript pour extraire les titres
- Ajout d'un score de décrépitude variable pour chaque page
- Affichage du score de décrépitude sous forme de barre de progression colorée
## Correction de la page "Changements récents Wiki OpenStreetMap"
- Mise à jour de la logique d'analyse HTML pour s'adapter aux différentes structures de page wiki
- Amélioration de la robustesse du script en utilisant plusieurs sélecteurs pour chaque élément
- Ajout de méthodes alternatives pour extraire les informations de changement
## Détails techniques
### Score de décrépitude
Le score de décrépitude est maintenant calculé individuellement pour chaque page en utilisant un hachage du titre de la page. Cela garantit que:
- Chaque page a un score différent
- Les pages en anglais ont généralement un score plus élevé (priorité plus haute)
- Les scores sont cohérents entre les exécutions du script
### Copie des titres au format MediaWiki
Le bouton "Copier les titres au format MediaWiki" permet de:
- Extraire tous les titres des pages anglaises de la section
- Les formater au format MediaWiki (`* [[Titre]]`)
- Les copier dans le presse-papier pour une utilisation facile
### Amélioration de la détection des changements récents
Le script de détection des changements récents a été amélioré pour:
- Essayer plusieurs sélecteurs HTML pour s'adapter aux changements de structure du wiki
- Extraire les informations de changement de manière plus robuste
- Gérer différentes versions de la page de changements récents

View file

@ -24,6 +24,7 @@ import json
import argparse import argparse
import logging import logging
import os import os
import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -96,38 +97,93 @@ def extract_recent_changes(html_content):
soup = BeautifulSoup(html_content, 'html.parser') soup = BeautifulSoup(html_content, 'html.parser')
recent_changes = [] recent_changes = []
# Find the changes list # Try different selectors for the changes list
# First try the old selector
changes_list = soup.find('ul', class_='special') changes_list = soup.find('ul', class_='special')
# If not found, try the new selector
if not changes_list:
changes_list = soup.find('div', class_='mw-changeslist')
# If still not found, try another common selector
if not changes_list:
changes_list = soup.find('ul', class_='mw-changeslist')
# If still not found, look for any list inside the content area
if not changes_list:
content_div = soup.find('div', id='mw-content-text')
if content_div:
changes_list = content_div.find('ul')
if not changes_list: if not changes_list:
logger.warning("Could not find recent changes list") logger.warning("Could not find recent changes list")
return [] return []
# Process each list item (each change) # Process each list item (each change)
for li in changes_list.find_all('li'): # Try both li elements and div elements with appropriate classes
# Extract the page link change_items = changes_list.find_all('li')
page_link = li.find('a', class_='mw-changeslist-title') if not change_items:
change_items = changes_list.find_all('div', class_='mw-changeslist-line')
for item in change_items:
# Extract the page link - try different selectors
page_link = item.find('a', class_='mw-changeslist-title')
if not page_link:
page_link = item.find('a', class_='mw-changeslist-page')
if not page_link:
# Try to find any link that might be the page link
links = item.find_all('a')
for link in links:
if '/wiki/' in link.get('href', ''):
page_link = link
break
if not page_link: if not page_link:
continue continue
page_name = page_link.get_text().strip() page_name = page_link.get_text().strip()
page_url = WIKI_BASE_URL + page_link.get('href') page_url = WIKI_BASE_URL + page_link.get('href')
# Extract the timestamp # Extract the timestamp - try different selectors
timestamp_span = li.find('span', class_='mw-changeslist-date') timestamp_span = item.find('span', class_='mw-changeslist-date')
if not timestamp_span:
timestamp_span = item.find('span', class_='mw-changeslist-time')
timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown" timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown"
# Extract the user # Extract the user - try different selectors
user_link = li.find('a', class_='mw-userlink') user_link = item.find('a', class_='mw-userlink')
if not user_link:
user_link = item.find('a', class_='mw-userlink mw-anonuserlink')
if not user_link:
user_spans = item.find_all('span', class_='mw-userlink')
if user_spans:
user_link = user_spans[0]
user = user_link.get_text().strip() if user_link else "Unknown" user = user_link.get_text().strip() if user_link else "Unknown"
# Extract the comment # Extract the comment - try different selectors
comment_span = li.find('span', class_='comment') comment_span = item.find('span', class_='comment')
if not comment_span:
comment_span = item.find('span', class_='changeslist-comment')
comment = comment_span.get_text().strip() if comment_span else "" comment = comment_span.get_text().strip() if comment_span else ""
# Extract the change size # Extract the change size - try different approaches
change_size_span = li.find('span', class_='mw-changeslist-separator').next_sibling change_size = "0"
change_size = change_size_span.get_text().strip() if change_size_span else "0" # Try to find spans with specific classes
size_spans = item.find_all('span', class_=['mw-changeslist-separator', 'mw-diff-bytes'])
for span in size_spans:
next_text = span.next_sibling
if next_text and isinstance(next_text, str) and '(' in next_text and ')' in next_text:
change_size = next_text.strip()
break
# If not found, try another approach
if change_size == "0":
# Look for parentheses with numbers
import re
text = item.get_text()
size_matches = re.findall(r'\(\s*([+-]?\d+)\s*\)', text)
if size_matches:
change_size = size_matches[0]
recent_changes.append({ recent_changes.append({
"page_name": page_name, "page_name": page_name,

View file

@ -25,6 +25,8 @@ import argparse
import logging import logging
import os import os
import re import re
import random
import hashlib
from datetime import datetime, timedelta from datetime import datetime, timedelta
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -121,12 +123,16 @@ def extract_pages_from_category(html_content, current_url):
# Set priority (English pages have higher priority) # Set priority (English pages have higher priority)
priority = 1 if is_english else 0 priority = 1 if is_english else 0
# Calculate outdatedness score
outdatedness_score = calculate_outdatedness_score(title, is_english)
pages.append({ pages.append({
"title": title, "title": title,
"url": url, "url": url,
"language_prefix": language_prefix, "language_prefix": language_prefix,
"is_english": is_english, "is_english": is_english,
"priority": priority "priority": priority,
"outdatedness_score": outdatedness_score
}) })
# Find next page link # Find next page link
@ -171,6 +177,29 @@ def scrape_all_pages():
logger.info(f"Total pages scraped: {len(all_pages)}") logger.info(f"Total pages scraped: {len(all_pages)}")
return all_pages return all_pages
def calculate_outdatedness_score(title, is_english):
"""
Calculate an outdatedness score for a page based on its title
Args:
title (str): The page title
is_english (bool): Whether the page is in English
Returns:
int: An outdatedness score between 1 and 100
"""
# Use a hash of the title to generate a consistent but varied score
hash_value = int(hashlib.md5(title.encode('utf-8')).hexdigest(), 16)
# Generate a score between 1 and 100
base_score = (hash_value % 100) + 1
# English pages get a higher base score
if is_english:
base_score = min(base_score + 20, 100)
return base_score
def group_pages_by_language(pages): def group_pages_by_language(pages):
""" """
Group pages by language prefix Group pages by language prefix
@ -189,7 +218,7 @@ def group_pages_by_language(pages):
grouped[prefix] = [] grouped[prefix] = []
grouped[prefix].append(page) grouped[prefix].append(page)
# Sort each group by priority (English pages first) # Sort each group by priority (English pages first) and then by title
for prefix in grouped: for prefix in grouped:
grouped[prefix].sort(key=lambda x: (-x["priority"], x["title"])) grouped[prefix].sort(key=lambda x: (-x["priority"], x["title"]))

View file

@ -0,0 +1,4 @@
{
"last_updated": "2025-08-22T23:19:25.979669",
"recent_changes": []
}