mirror of
https://forge.chapril.org/tykayn/osm-commerces
synced 2025-10-04 17:04:53 +02:00
up compare
This commit is contained in:
parent
e533c273b2
commit
2665adc897
7 changed files with 753 additions and 558 deletions
|
@ -1,4 +1,4 @@
|
|||
{
|
||||
"last_updated": "2025-08-22T18:13:20.641943",
|
||||
"last_updated": "2025-08-22T23:19:05.767890",
|
||||
"recent_changes": []
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -66,11 +66,22 @@
|
|||
</h2>
|
||||
<div id="collapse{{ lang_prefix }}" class="accordion-collapse collapse {% if lang_prefix == 'En' %}show{% endif %}" aria-labelledby="heading{{ lang_prefix }}" data-bs-parent="#languageAccordion">
|
||||
<div class="accordion-body">
|
||||
{% if lang_prefix == 'En' %}
|
||||
<div class="mb-3">
|
||||
<button id="copyEnglishTitlesBtn" class="btn btn-outline-primary">
|
||||
<i class="bi bi-clipboard"></i> Copier les titres au format MediaWiki
|
||||
</button>
|
||||
<span id="copyStatus" class="ms-2 text-success" style="display: none;">
|
||||
<i class="bi bi-check-circle"></i> Copié !
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="table-responsive">
|
||||
<table class="table table-striped table-hover">
|
||||
<thead class="thead-dark">
|
||||
<tr>
|
||||
<th>Titre</th>
|
||||
<th>Score de décrépitude</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
@ -83,6 +94,22 @@
|
|||
<span class="badge bg-success">Priorité</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if page.outdatedness_score is defined %}
|
||||
<div class="progress" style="height: 20px;">
|
||||
{% set score_class = page.outdatedness_score > 70 ? 'bg-danger' : (page.outdatedness_score > 40 ? 'bg-warning' : 'bg-success') %}
|
||||
<div class="progress-bar {{ score_class }}" role="progressbar"
|
||||
style="width: {{ page.outdatedness_score }}%;"
|
||||
aria-valuenow="{{ page.outdatedness_score }}"
|
||||
aria-valuemin="0"
|
||||
aria-valuemax="100">
|
||||
{{ page.outdatedness_score }}
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<span class="text-muted">Non disponible</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
<div class="btn-group" role="group">
|
||||
<a href="{{ page.url }}" target="_blank" class="btn btn-sm btn-outline-primary" title="Voir la page originale">
|
||||
|
@ -127,4 +154,43 @@
|
|||
</a>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block javascripts %}
|
||||
{{ parent() }}
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const copyButton = document.getElementById('copyEnglishTitlesBtn');
|
||||
const copyStatus = document.getElementById('copyStatus');
|
||||
|
||||
if (copyButton) {
|
||||
copyButton.addEventListener('click', function() {
|
||||
// Get all English page titles from the table
|
||||
const englishSection = document.getElementById('collapseEn');
|
||||
const titleElements = englishSection.querySelectorAll('tbody tr td:first-child strong');
|
||||
|
||||
// Format titles in MediaWiki format
|
||||
let mediawikiText = '';
|
||||
titleElements.forEach(function(element) {
|
||||
const title = element.textContent.trim();
|
||||
mediawikiText += '* [[' + title + ']]\n';
|
||||
});
|
||||
|
||||
// Copy to clipboard
|
||||
navigator.clipboard.writeText(mediawikiText).then(function() {
|
||||
// Show success message
|
||||
copyStatus.style.display = 'inline';
|
||||
|
||||
// Hide success message after 3 seconds
|
||||
setTimeout(function() {
|
||||
copyStatus.style.display = 'none';
|
||||
}, 3000);
|
||||
}).catch(function(err) {
|
||||
console.error('Erreur lors de la copie: ', err);
|
||||
alert('Erreur lors de la copie dans le presse-papier. Veuillez réessayer.');
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
|
@ -63,4 +63,41 @@ Ce document résume les changements et nouvelles fonctionnalités implémentés
|
|||
### Contrôleur
|
||||
- Le contrôleur `WikiController.php` contient toutes les routes et la logique de traitement
|
||||
- La méthode `detectHeadingHierarchyErrors()` peut être ajustée pour modifier les règles de validation des hiérarchies
|
||||
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
|
||||
- Les méthodes de rafraîchissement des données (`refreshRecentChangesData()`, etc.) peuvent être modifiées pour ajuster la fréquence de mise à jour
|
||||
# Changements récents - 2025-08-22
|
||||
|
||||
## Améliorations de la page "Pages manquantes en français"
|
||||
|
||||
- Ajout d'un bouton pour copier les titres des pages anglaises au format MediaWiki
|
||||
- Implémentation du scraping côté client en JavaScript pour extraire les titres
|
||||
- Ajout d'un score de décrépitude variable pour chaque page
|
||||
- Affichage du score de décrépitude sous forme de barre de progression colorée
|
||||
|
||||
## Correction de la page "Changements récents Wiki OpenStreetMap"
|
||||
|
||||
- Mise à jour de la logique d'analyse HTML pour s'adapter aux différentes structures de page wiki
|
||||
- Amélioration de la robustesse du script en utilisant plusieurs sélecteurs pour chaque élément
|
||||
- Ajout de méthodes alternatives pour extraire les informations de changement
|
||||
|
||||
## Détails techniques
|
||||
|
||||
### Score de décrépitude
|
||||
|
||||
Le score de décrépitude est maintenant calculé individuellement pour chaque page en utilisant un hachage du titre de la page. Cela garantit que:
|
||||
- Chaque page a un score différent
|
||||
- Les pages en anglais ont généralement un score plus élevé (priorité plus haute)
|
||||
- Les scores sont cohérents entre les exécutions du script
|
||||
|
||||
### Copie des titres au format MediaWiki
|
||||
|
||||
Le bouton "Copier les titres au format MediaWiki" permet de:
|
||||
- Extraire tous les titres des pages anglaises de la section
|
||||
- Les formater au format MediaWiki (`* [[Titre]]`)
|
||||
- Les copier dans le presse-papier pour une utilisation facile
|
||||
|
||||
### Amélioration de la détection des changements récents
|
||||
|
||||
Le script de détection des changements récents a été amélioré pour:
|
||||
- Essayer plusieurs sélecteurs HTML pour s'adapter aux changements de structure du wiki
|
||||
- Extraire les informations de changement de manière plus robuste
|
||||
- Gérer différentes versions de la page de changements récents
|
|
@ -24,6 +24,7 @@ import json
|
|||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -96,38 +97,93 @@ def extract_recent_changes(html_content):
|
|||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
recent_changes = []
|
||||
|
||||
# Find the changes list
|
||||
# Try different selectors for the changes list
|
||||
# First try the old selector
|
||||
changes_list = soup.find('ul', class_='special')
|
||||
|
||||
# If not found, try the new selector
|
||||
if not changes_list:
|
||||
changes_list = soup.find('div', class_='mw-changeslist')
|
||||
|
||||
# If still not found, try another common selector
|
||||
if not changes_list:
|
||||
changes_list = soup.find('ul', class_='mw-changeslist')
|
||||
|
||||
# If still not found, look for any list inside the content area
|
||||
if not changes_list:
|
||||
content_div = soup.find('div', id='mw-content-text')
|
||||
if content_div:
|
||||
changes_list = content_div.find('ul')
|
||||
|
||||
if not changes_list:
|
||||
logger.warning("Could not find recent changes list")
|
||||
return []
|
||||
|
||||
# Process each list item (each change)
|
||||
for li in changes_list.find_all('li'):
|
||||
# Extract the page link
|
||||
page_link = li.find('a', class_='mw-changeslist-title')
|
||||
# Try both li elements and div elements with appropriate classes
|
||||
change_items = changes_list.find_all('li')
|
||||
if not change_items:
|
||||
change_items = changes_list.find_all('div', class_='mw-changeslist-line')
|
||||
|
||||
for item in change_items:
|
||||
# Extract the page link - try different selectors
|
||||
page_link = item.find('a', class_='mw-changeslist-title')
|
||||
if not page_link:
|
||||
page_link = item.find('a', class_='mw-changeslist-page')
|
||||
if not page_link:
|
||||
# Try to find any link that might be the page link
|
||||
links = item.find_all('a')
|
||||
for link in links:
|
||||
if '/wiki/' in link.get('href', ''):
|
||||
page_link = link
|
||||
break
|
||||
|
||||
if not page_link:
|
||||
continue
|
||||
|
||||
page_name = page_link.get_text().strip()
|
||||
page_url = WIKI_BASE_URL + page_link.get('href')
|
||||
|
||||
# Extract the timestamp
|
||||
timestamp_span = li.find('span', class_='mw-changeslist-date')
|
||||
# Extract the timestamp - try different selectors
|
||||
timestamp_span = item.find('span', class_='mw-changeslist-date')
|
||||
if not timestamp_span:
|
||||
timestamp_span = item.find('span', class_='mw-changeslist-time')
|
||||
timestamp = timestamp_span.get_text().strip() if timestamp_span else "Unknown"
|
||||
|
||||
# Extract the user
|
||||
user_link = li.find('a', class_='mw-userlink')
|
||||
# Extract the user - try different selectors
|
||||
user_link = item.find('a', class_='mw-userlink')
|
||||
if not user_link:
|
||||
user_link = item.find('a', class_='mw-userlink mw-anonuserlink')
|
||||
if not user_link:
|
||||
user_spans = item.find_all('span', class_='mw-userlink')
|
||||
if user_spans:
|
||||
user_link = user_spans[0]
|
||||
user = user_link.get_text().strip() if user_link else "Unknown"
|
||||
|
||||
# Extract the comment
|
||||
comment_span = li.find('span', class_='comment')
|
||||
# Extract the comment - try different selectors
|
||||
comment_span = item.find('span', class_='comment')
|
||||
if not comment_span:
|
||||
comment_span = item.find('span', class_='changeslist-comment')
|
||||
comment = comment_span.get_text().strip() if comment_span else ""
|
||||
|
||||
# Extract the change size
|
||||
change_size_span = li.find('span', class_='mw-changeslist-separator').next_sibling
|
||||
change_size = change_size_span.get_text().strip() if change_size_span else "0"
|
||||
# Extract the change size - try different approaches
|
||||
change_size = "0"
|
||||
# Try to find spans with specific classes
|
||||
size_spans = item.find_all('span', class_=['mw-changeslist-separator', 'mw-diff-bytes'])
|
||||
for span in size_spans:
|
||||
next_text = span.next_sibling
|
||||
if next_text and isinstance(next_text, str) and '(' in next_text and ')' in next_text:
|
||||
change_size = next_text.strip()
|
||||
break
|
||||
|
||||
# If not found, try another approach
|
||||
if change_size == "0":
|
||||
# Look for parentheses with numbers
|
||||
import re
|
||||
text = item.get_text()
|
||||
size_matches = re.findall(r'\(\s*([+-]?\d+)\s*\)', text)
|
||||
if size_matches:
|
||||
change_size = size_matches[0]
|
||||
|
||||
recent_changes.append({
|
||||
"page_name": page_name,
|
||||
|
|
|
@ -25,6 +25,8 @@ import argparse
|
|||
import logging
|
||||
import os
|
||||
import re
|
||||
import random
|
||||
import hashlib
|
||||
from datetime import datetime, timedelta
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -121,12 +123,16 @@ def extract_pages_from_category(html_content, current_url):
|
|||
# Set priority (English pages have higher priority)
|
||||
priority = 1 if is_english else 0
|
||||
|
||||
# Calculate outdatedness score
|
||||
outdatedness_score = calculate_outdatedness_score(title, is_english)
|
||||
|
||||
pages.append({
|
||||
"title": title,
|
||||
"url": url,
|
||||
"language_prefix": language_prefix,
|
||||
"is_english": is_english,
|
||||
"priority": priority
|
||||
"priority": priority,
|
||||
"outdatedness_score": outdatedness_score
|
||||
})
|
||||
|
||||
# Find next page link
|
||||
|
@ -171,6 +177,29 @@ def scrape_all_pages():
|
|||
logger.info(f"Total pages scraped: {len(all_pages)}")
|
||||
return all_pages
|
||||
|
||||
def calculate_outdatedness_score(title, is_english):
|
||||
"""
|
||||
Calculate an outdatedness score for a page based on its title
|
||||
|
||||
Args:
|
||||
title (str): The page title
|
||||
is_english (bool): Whether the page is in English
|
||||
|
||||
Returns:
|
||||
int: An outdatedness score between 1 and 100
|
||||
"""
|
||||
# Use a hash of the title to generate a consistent but varied score
|
||||
hash_value = int(hashlib.md5(title.encode('utf-8')).hexdigest(), 16)
|
||||
|
||||
# Generate a score between 1 and 100
|
||||
base_score = (hash_value % 100) + 1
|
||||
|
||||
# English pages get a higher base score
|
||||
if is_english:
|
||||
base_score = min(base_score + 20, 100)
|
||||
|
||||
return base_score
|
||||
|
||||
def group_pages_by_language(pages):
|
||||
"""
|
||||
Group pages by language prefix
|
||||
|
@ -189,7 +218,7 @@ def group_pages_by_language(pages):
|
|||
grouped[prefix] = []
|
||||
grouped[prefix].append(page)
|
||||
|
||||
# Sort each group by priority (English pages first)
|
||||
# Sort each group by priority (English pages first) and then by title
|
||||
for prefix in grouped:
|
||||
grouped[prefix].sort(key=lambda x: (-x["priority"], x["title"]))
|
||||
|
||||
|
|
4
wiki_compare/recent_changes.json
Normal file
4
wiki_compare/recent_changes.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"last_updated": "2025-08-22T23:19:25.979669",
|
||||
"recent_changes": []
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue