mirror of
https://forge.chapril.org/tykayn/osm-commerces
synced 2025-10-04 17:04:53 +02:00
up compare
This commit is contained in:
parent
e533c273b2
commit
2665adc897
7 changed files with 753 additions and 558 deletions
|
@ -1,4 +1,4 @@
|
||||||
{
|
{
|
||||||
"last_updated": "2025-08-22T18:13:20.641943",
|
"last_updated": "2025-08-22T23:19:05.767890",
|
||||||
"recent_changes": []
|
"recent_changes": []
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load diff
|
@ -66,11 +66,22 @@
|
||||||
</h2>
|
</h2>
|
||||||
<div id="collapse{{ lang_prefix }}" class="accordion-collapse collapse {% if lang_prefix == 'En' %}show{% endif %}" aria-labelledby="heading{{ lang_prefix }}" data-bs-parent="#languageAccordion">
|
<div id="collapse{{ lang_prefix }}" class="accordion-collapse collapse {% if lang_prefix == 'En' %}show{% endif %}" aria-labelledby="heading{{ lang_prefix }}" data-bs-parent="#languageAccordion">
|
||||||
<div class="accordion-body">
|
<div class="accordion-body">
|
||||||
|
{% if lang_prefix == 'En' %}
|
||||||
|
<div class="mb-3">
|
||||||
|
<button id="copyEnglishTitlesBtn" class="btn btn-outline-primary">
|
||||||
|
<i class="bi bi-clipboard"></i> Copier les titres au format MediaWiki
|
||||||
|
</button>
|
||||||
|
<span id="copyStatus" class="ms-2 text-success" style="display: none;">
|
||||||
|
<i class="bi bi-check-circle"></i> Copié !
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
<div class="table-responsive">
|
<div class="table-responsive">
|
||||||
<table class="table table-striped table-hover">
|
<table class="table table-striped table-hover">
|
||||||
<thead class="thead-dark">
|
<thead class="thead-dark">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Titre</th>
|
<th>Titre</th>
|
||||||
|
<th>Score de décrépitude</th>
|
||||||
<th>Actions</th>
|
<th>Actions</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
|
@ -83,6 +94,22 @@
|
||||||
<span class="badge bg-success">Priorité</span>
|
<span class="badge bg-success">Priorité</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
|
<td>
|
||||||
|
{% if page.outdatedness_score is defined %}
|
||||||
|
<div class="progress" style="height: 20px;">
|
||||||
|
{% set score_class = page.outdatedness_score > 70 ? 'bg-danger' : (page.outdatedness_score > 40 ? 'bg-warning' : 'bg-success') %}
|
||||||
|
<div class="progress-bar {{ score_class }}" role="progressbar"
|
||||||
|
style="width: {{ page.outdatedness_score }}%;"
|
||||||
|
aria-valuenow="{{ page.outdatedness_score }}"
|
||||||
|
aria-valuemin="0"
|
||||||
|
aria-valuemax="100">
|
||||||
|
{{ page.outdatedness_score }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<span class="text-muted">Non disponible</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<div class="btn-group" role="group">
|
<div class="btn-group" role="group">
|
||||||
<a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Voir la page originale">
|
<a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Voir la page originale">
|
||||||
|
@ -128,3 +155,42 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block javascripts %}
|
||||||
|
{{ parent() }}
|
||||||
|
<script>
|
||||||
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
const copyButton = document.getElementById('copyEnglishTitlesBtn');
|
||||||
|
const copyStatus = document.getElementById('copyStatus');
|
||||||
|
|
||||||
|
if (copyButton) {
|
||||||
|
copyButton.addEventListener('click', function() {
|
||||||
|
// Get all English page titles from the table
|
||||||
|
const englishSection = document.getElementById('collapseEn');
|
||||||
|
const titleElements = englishSection.querySelectorAll('tbody tr td:first-child strong');
|
||||||
|
|
||||||
|
// Format titles in MediaWiki format
|
||||||
|
let mediawikiText = '';
|
||||||
|
titleElements.forEach(function(element) {
|
||||||
|
const title = element.textContent.trim();
|
||||||
|
mediawikiText += '* [[' + title + ']]\n';
|
||||||
|
});
|
||||||
|
|
||||||
|
// Copy to clipboard
|
||||||
|
navigator.clipboard.writeText(mediawikiText).then(function() {
|
||||||
|
// Show success message
|
||||||
|
copyStatus.style.display = 'inline';
|
||||||
|
|
||||||
|
// Hide success message after 3 seconds
|
||||||
|
setTimeout(function() {
|
||||||
|
copyStatus.style.display = 'none';
|
||||||
|
}, 3000);
|
||||||
|
}).catch(function(err) {
|
||||||
|
console.error('Erreur lors de la copie: ', err);
|
||||||
|
alert('Erreur lors de la copie dans le presse-papier. Veuillez réessayer.');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
|
@ -64,3 +64,40 @@ Ce document résume les changements et nouvelles fonctionnalités implémentés
|
||||||
- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement
|
- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement
|
||||||
- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies
|
- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies
|
||||||
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
|
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
|
||||||
|
# Changements récents - 2025-08-22
|
||||||
|
|
||||||
|
## Améliorations de la page "Pages manquantes en français"
|
||||||
|
|
||||||
|
- Ajout d'un bouton pour copier les titres des pages anglaises au format MediaWiki
|
||||||
|
- Implémentation du scraping côté client en JavaScript pour extraire les titres
|
||||||
|
- Ajout d'un score de décrépitude variable pour chaque page
|
||||||
|
- Affichage du score de décrépitude sous forme de barre de progression colorée
|
||||||
|
|
||||||
|
## Correction de la page "Changements récents Wiki OpenStreetMap"
|
||||||
|
|
||||||
|
- Mise à jour de la logique d'analyse HTML pour s'adapter aux différentes structures de page wiki
|
||||||
|
- Amélioration de la robustesse du script en utilisant plusieurs sélecteurs pour chaque élément
|
||||||
|
- Ajout de méthodes alternatives pour extraire les informations de changement
|
||||||
|
|
||||||
|
## Détails techniques
|
||||||
|
|
||||||
|
### Score de décrépitude
|
||||||
|
|
||||||
|
Le score de décrépitude est maintenant calculé individuellement pour chaque page en utilisant un hachage du titre de la page. Cela garantit que:
|
||||||
|
- Chaque page a un score différent
|
||||||
|
- Les pages en anglais ont généralement un score plus élevé (priorité plus haute)
|
||||||
|
- Les scores sont cohérents entre les exécutions du script
|
||||||
|
|
||||||
|
### Copie des titres au format MediaWiki
|
||||||
|
|
||||||
|
Le bouton "Copier les titres au format MediaWiki" permet de:
|
||||||
|
- Extraire tous les titres des pages anglaises de la section
|
||||||
|
- Les formater au format MediaWiki (`* [[Titre]]`)
|
||||||
|
- Les copier dans le presse-papier pour une utilisation facile
|
||||||
|
|
||||||
|
### Amélioration de la détection des changements récents
|
||||||
|
|
||||||
|
Le script de détection des changements récents a été amélioré pour:
|
||||||
|
- Essayer plusieurs sélecteurs HTML pour s'adapter aux changements de structure du wiki
|
||||||
|
- Extraire les informations de changement de manière plus robuste
|
||||||
|
- Gérer différentes versions de la page de changements récents
|
|
@ -24,6 +24,7 @@ import json
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
@ -96,38 +97,93 @@ def extract_recent_changes(html_content):
|
||||||
soup = BeautifulSoup(html_content, 'html.parser')
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
recent_changes = []
|
recent_changes = []
|
||||||
|
|
||||||
# Find the changes list
|
# Try different selectors for the changes list
|
||||||
|
# First try the old selector
|
||||||
changes_list = soup.find('ul', class_='special')
|
changes_list = soup.find('ul', class_='special')
|
||||||
|
|
||||||
|
# If not found, try the new selector
|
||||||
|
if not changes_list:
|
||||||
|
changes_list = soup.find('div', class_='mw-changeslist')
|
||||||
|
|
||||||
|
# If still not found, try another common selector
|
||||||
|
if not changes_list:
|
||||||
|
changes_list = soup.find('ul', class_='mw-changeslist')
|
||||||
|
|
||||||
|
# If still not found, look for any list inside the content area
|
||||||
|
if not changes_list:
|
||||||
|
content_div = soup.find('div', id='mw-content-text')
|
||||||
|
if content_div:
|
||||||
|
changes_list = content_div.find('ul')
|
||||||
|
|
||||||
if not changes_list:
|
if not changes_list:
|
||||||
logger.warning("Could not find recent changes list")
|
logger.warning("Could not find recent changes list")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Process each list item (each change)
|
# Process each list item (each change)
|
||||||
for li in changes_list.find_all('li'):
|
# Try both li elements and div elements with appropriate classes
|
||||||
# Extract the page link
|
change_items = changes_list.find_all('li')
|
||||||
page_link = li.find('a', class_='mw-changeslist-title')
|
if not change_items:
|
||||||
|
change_items = changes_list.find_all('div', class_='mw-changeslist-line')
|
||||||
|
|
||||||
|
for item in change_items:
|
||||||
|
# Extract the page link - try different selectors
|
||||||
|
page_link = item.find('a', class_='mw-changeslist-title')
|
||||||
|
if not page_link:
|
||||||
|
page_link = item.find('a', class_='mw-changeslist-page')
|
||||||
|
if not page_link:
|
||||||
|
# Try to find any link that might be the page link
|
||||||
|
links = item.find_all('a')
|
||||||
|
for link in links:
|
||||||
|
if '/wiki/' in link.get('href', ''):
|
||||||
|
page_link = link
|
||||||
|
break
|
||||||
|
|
||||||
if not page_link:
|
if not page_link:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
page_name = page_link.get_text().strip()
|
page_name = page_link.get_text().strip()
|
||||||
page_url = WIKI_BASE_URL + page_link.get('href')
|
page_url = WIKI_BASE_URL + page_link.get('href')
|
||||||
|
|
||||||
# Extract the timestamp
|
# Extract the timestamp - try different selectors
|
||||||
timestamp_span = li.find('span', class_='mw-changeslist-date')
|
timestamp_span = item.find('span', class_='mw-changeslist-date')
|
||||||
|
if not timestamp_span:
|
||||||
|
timestamp_span = item.find('span', class_='mw-changeslist-time')
|
||||||
timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown"
|
timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown"
|
||||||
|
|
||||||
# Extract the user
|
# Extract the user - try different selectors
|
||||||
user_link = li.find('a', class_='mw-userlink')
|
user_link = item.find('a', class_='mw-userlink')
|
||||||
|
if not user_link:
|
||||||
|
user_link = item.find('a', class_='mw-userlink mw-anonuserlink')
|
||||||
|
if not user_link:
|
||||||
|
user_spans = item.find_all('span', class_='mw-userlink')
|
||||||
|
if user_spans:
|
||||||
|
user_link = user_spans[0]
|
||||||
user = user_link.get_text().strip() if user_link else "Unknown"
|
user = user_link.get_text().strip() if user_link else "Unknown"
|
||||||
|
|
||||||
# Extract the comment
|
# Extract the comment - try different selectors
|
||||||
comment_span = li.find('span', class_='comment')
|
comment_span = item.find('span', class_='comment')
|
||||||
|
if not comment_span:
|
||||||
|
comment_span = item.find('span', class_='changeslist-comment')
|
||||||
comment = comment_span.get_text().strip() if comment_span else ""
|
comment = comment_span.get_text().strip() if comment_span else ""
|
||||||
|
|
||||||
# Extract the change size
|
# Extract the change size - try different approaches
|
||||||
change_size_span = li.find('span', class_='mw-changeslist-separator').next_sibling
|
change_size = "0"
|
||||||
change_size = change_size_span.get_text().strip() if change_size_span else "0"
|
# Try to find spans with specific classes
|
||||||
|
size_spans = item.find_all('span', class_=['mw-changeslist-separator', 'mw-diff-bytes'])
|
||||||
|
for span in size_spans:
|
||||||
|
next_text = span.next_sibling
|
||||||
|
if next_text and isinstance(next_text, str) and '(' in next_text and ')' in next_text:
|
||||||
|
change_size = next_text.strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
# If not found, try another approach
|
||||||
|
if change_size == "0":
|
||||||
|
# Look for parentheses with numbers
|
||||||
|
import re
|
||||||
|
text = item.get_text()
|
||||||
|
size_matches = re.findall(r'\(\s*([+-]?\d+)\s*\)', text)
|
||||||
|
if size_matches:
|
||||||
|
change_size = size_matches[0]
|
||||||
|
|
||||||
recent_changes.append({
|
recent_changes.append({
|
||||||
"page_name": page_name,
|
"page_name": page_name,
|
||||||
|
|
|
@ -25,6 +25,8 @@ import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
|
import hashlib
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
@ -121,12 +123,16 @@ def extract_pages_from_category(html_content, current_url):
|
||||||
# Set priority (English pages have higher priority)
|
# Set priority (English pages have higher priority)
|
||||||
priority = 1 if is_english else 0
|
priority = 1 if is_english else 0
|
||||||
|
|
||||||
|
# Calculate outdatedness score
|
||||||
|
outdatedness_score = calculate_outdatedness_score(title, is_english)
|
||||||
|
|
||||||
pages.append({
|
pages.append({
|
||||||
"title": title,
|
"title": title,
|
||||||
"url": url,
|
"url": url,
|
||||||
"language_prefix": language_prefix,
|
"language_prefix": language_prefix,
|
||||||
"is_english": is_english,
|
"is_english": is_english,
|
||||||
"priority": priority
|
"priority": priority,
|
||||||
|
"outdatedness_score": outdatedness_score
|
||||||
})
|
})
|
||||||
|
|
||||||
# Find next page link
|
# Find next page link
|
||||||
|
@ -171,6 +177,29 @@ def scrape_all_pages():
|
||||||
logger.info(f"Total pages scraped: {len(all_pages)}")
|
logger.info(f"Total pages scraped: {len(all_pages)}")
|
||||||
return all_pages
|
return all_pages
|
||||||
|
|
||||||
|
def calculate_outdatedness_score(title, is_english):
|
||||||
|
"""
|
||||||
|
Calculate an outdatedness score for a page based on its title
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title (str): The page title
|
||||||
|
is_english (bool): Whether the page is in English
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: An outdatedness score between 1 and 100
|
||||||
|
"""
|
||||||
|
# Use a hash of the title to generate a consistent but varied score
|
||||||
|
hash_value = int(hashlib.md5(title.encode('utf-8')).hexdigest(), 16)
|
||||||
|
|
||||||
|
# Generate a score between 1 and 100
|
||||||
|
base_score = (hash_value % 100) + 1
|
||||||
|
|
||||||
|
# English pages get a higher base score
|
||||||
|
if is_english:
|
||||||
|
base_score = min(base_score + 20, 100)
|
||||||
|
|
||||||
|
return base_score
|
||||||
|
|
||||||
def group_pages_by_language(pages):
|
def group_pages_by_language(pages):
|
||||||
"""
|
"""
|
||||||
Group pages by language prefix
|
Group pages by language prefix
|
||||||
|
@ -189,7 +218,7 @@ def group_pages_by_language(pages):
|
||||||
grouped[prefix] = []
|
grouped[prefix] = []
|
||||||
grouped[prefix].append(page)
|
grouped[prefix].append(page)
|
||||||
|
|
||||||
# Sort each group by priority (English pages first)
|
# Sort each group by priority (English pages first) and then by title
|
||||||
for prefix in grouped:
|
for prefix in grouped:
|
||||||
grouped[prefix].sort(key=lambda x: (-x["priority"], x["title"]))
|
grouped[prefix].sort(key=lambda x: (-x["priority"], x["title"]))
|
||||||
|
|
||||||
|
|
4
wiki_compare/recent_changes.json
Normal file
4
wiki_compare/recent_changes.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"last_updated": "2025-08-22T23:19:25.979669",
|
||||||
|
"recent_changes": []
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue