change template panel left, create dashboard

This commit is contained in:
Tykayn 2025-09-08 18:40:08 +02:00 committed by tykayn
parent 381f378db4
commit 539b4c094f
24 changed files with 1367 additions and 166 deletions

View file

@ -61,10 +61,12 @@ WIKI_BASE_URL_EN = "https://wiki.openstreetmap.org/wiki/Key:"
WIKI_BASE_URL_FR = "https://wiki.openstreetmap.org/wiki/FR:Key:"
WIKI_BASE_URL = "https://wiki.openstreetmap.org/wiki/"
WIKI_CATEGORY_URL = "https://wiki.openstreetmap.org/wiki/Category:FR:Traductions_d%C3%A9synchronis%C3%A9es"
WIKI_DEADEND_PAGES_URL = "https://wiki.openstreetmap.org/w/index.php?title=Special:DeadendPages&limit=500&offset=1000"
TOP_KEYS_FILE = "top_keys.json"
KEYS_WITHOUT_WIKI_FILE = "keys_without_wiki.json"
WIKI_PAGES_CSV = "wiki_pages.csv"
OUTDATED_PAGES_FILE = "outdated_pages.json"
DEADEND_PAGES_FILE = "deadend_pages.json"
STALENESS_HISTOGRAM_FILE = "staleness_histogram.png"
# Number of wiki pages to examine
NUM_WIKI_PAGES = 2
@ -154,6 +156,161 @@ def fetch_desynchronized_pages():
logger.error(f"Error fetching category page: {e}")
return []
def suggest_categories(page_title, page_url):
"""
Suggest categories for an uncategorized page based on its title and content
Args:
page_title (str): Title of the page
page_url (str): URL of the page
Returns:
list: List of suggested categories
"""
logger.info(f"Suggesting categories for page: {page_title}")
suggested_categories = []
# Common categories for French OSM wiki pages
common_categories = [
"Documentation OSM en français",
"Cartographie",
"Contributeurs",
"Développeurs",
"Éléments cartographiés",
"Imports",
"Logiciels",
"Projets",
"Rencontres",
"Utilisateurs"
]
# Add geography-related categories for pages about France
if "France" in page_title:
suggested_categories.append("France")
# Check for specific regions or departments
regions = [
"Auvergne-Rhône-Alpes", "Bourgogne-Franche-Comté", "Bretagne",
"Centre-Val de Loire", "Corse", "Grand Est", "Hauts-de-France",
"Île-de-France", "Normandie", "Nouvelle-Aquitaine",
"Occitanie", "Pays de la Loire", "Provence-Alpes-Côte d'Azur"
]
for region in regions:
if region in page_title:
suggested_categories.append(region)
# Try to fetch the page content to make better suggestions
try:
response = requests.get(page_url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Get the main content
content = soup.select_one('#mw-content-text')
if content:
text = content.get_text(separator=' ', strip=True).lower()
# Check for keywords related to common categories
if any(keyword in text for keyword in ["carte", "cartographie", "mapper"]):
suggested_categories.append("Cartographie")
if any(keyword in text for keyword in ["contribuer", "contributeur", "éditer"]):
suggested_categories.append("Contributeurs")
if any(keyword in text for keyword in ["développeur", "programmer", "code", "api"]):
suggested_categories.append("Développeurs")
if any(keyword in text for keyword in ["tag", "clé", "valeur", "élément", "nœud", "way", "relation"]):
suggested_categories.append("Éléments cartographiés")
if any(keyword in text for keyword in ["import", "données", "dataset"]):
suggested_categories.append("Imports")
if any(keyword in text for keyword in ["logiciel", "application", "outil"]):
suggested_categories.append("Logiciels")
if any(keyword in text for keyword in ["projet", "initiative"]):
suggested_categories.append("Projets")
if any(keyword in text for keyword in ["rencontre", "réunion", "événement", "conférence"]):
suggested_categories.append("Rencontres")
if any(keyword in text for keyword in ["utiliser", "utilisateur", "usage"]):
suggested_categories.append("Utilisateurs")
except requests.exceptions.RequestException as e:
logger.warning(f"Error fetching page content for category suggestions: {e}")
# If we can't fetch the content, suggest common categories based on title only
if "projet" in page_title.lower():
suggested_categories.append("Projets")
elif "logiciel" in page_title.lower() or "application" in page_title.lower():
suggested_categories.append("Logiciels")
elif "rencontre" in page_title.lower() or "réunion" in page_title.lower():
suggested_categories.append("Rencontres")
# Always suggest the general French documentation category
suggested_categories.append("Documentation OSM en français")
# Remove duplicates while preserving order
seen = set()
unique_categories = []
for cat in suggested_categories:
if cat not in seen:
seen.add(cat)
unique_categories.append(cat)
logger.info(f"Suggested {len(unique_categories)} categories for {page_title}: {', '.join(unique_categories)}")
return unique_categories
def fetch_deadend_pages():
"""
Fetch pages starting with "France" from the DeadendPages list
Returns:
list: List of dictionaries containing page information
"""
logger.info(f"Fetching pages from DeadendPages list: {WIKI_DEADEND_PAGES_URL}")
try:
response = requests.get(WIKI_DEADEND_PAGES_URL)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Find all links in the DeadendPages list
page_links = []
for link in soup.select('.mw-spcontent li a'):
href = link.get('href', '')
title = link.get_text(strip=True)
# Skip if it's not a wiki page or if it's a special page
if not href.startswith('/wiki/') or 'Special:' in href:
continue
# Filter pages that start with "France"
if title.startswith('France'):
# Get the full URL
full_url = 'https://wiki.openstreetmap.org' + href
# Suggest categories for this page
suggested_categories = suggest_categories(title, full_url)
page_links.append({
'title': title,
'url': full_url,
'suggested_categories': suggested_categories
})
logger.info(f"Found {len(page_links)} pages starting with 'France' in the DeadendPages list")
return page_links
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching DeadendPages list: {e}")
return []
def fetch_top_keys(limit=NUM_WIKI_PAGES):
"""
Fetch the most used OSM keys from TagInfo API
@ -1365,10 +1522,11 @@ def main():
3. Fetches and processes wiki pages for these keys
4. Processes specific wiki pages listed in SPECIFIC_PAGES
5. Processes pages from the FR:Traductions_désynchronisées category
6. Calculates staleness scores for all pages
7. Generates a histogram of staleness scores
8. Saves the results to CSV and JSON files
9. Prints a list of pages that need updating
6. Processes pages starting with "France" from the DeadendPages list
7. Calculates staleness scores for all pages
8. Generates a histogram of staleness scores
9. Saves the results to CSV and JSON files
10. Prints a list of pages that need updating
"""
# Parse command-line arguments
parser = argparse.ArgumentParser(description='Compare OpenStreetMap wiki pages in English and French.')
@ -1404,6 +1562,62 @@ def main():
logger.info(f"Saved {len(keys_without_wiki)} keys without wiki pages to {KEYS_WITHOUT_WIKI_FILE}")
else:
logger.warning("No keys without wiki pages were fetched.")
# Fetch pages starting with "France" from the DeadendPages list
deadend_pages = fetch_deadend_pages()
if deadend_pages:
# Load existing deadend pages data to compare with history
existing_data = load_json_data(DEADEND_PAGES_FILE)
# Initialize history if it doesn't exist
if 'history' not in existing_data:
existing_data['history'] = {}
# Get the most recent history entry
sorted_timestamps = sorted(existing_data.get('history', {}).keys())
previous_pages = []
if sorted_timestamps:
latest_timestamp = sorted_timestamps[-1]
previous_pages = existing_data['history'][latest_timestamp].get('pages', [])
# Find pages that were in the previous list but are no longer in the current list
previous_urls = [page['url'] for page in previous_pages]
current_urls = [page['url'] for page in deadend_pages]
categorized_pages = []
for url in previous_urls:
if url not in current_urls:
# Find the page in previous_pages
for page in previous_pages:
if page['url'] == url:
# This page is no longer in the DeadendPages list, which means it has been categorized
categorized_pages.append(page)
break
# Create a timestamp for the current data
current_timestamp = datetime.now().isoformat()
# Create the history entry
history_entry = {
'pages': deadend_pages,
'categorized_pages': categorized_pages
}
# Add the entry to history with timestamp as key
existing_data['history'][current_timestamp] = history_entry
# Update the current data
existing_data['pages'] = deadend_pages
existing_data['categorized_pages'] = categorized_pages
existing_data['last_updated'] = current_timestamp
# Save the updated data
save_to_json(existing_data, DEADEND_PAGES_FILE)
logger.info(f"Saved {len(deadend_pages)} deadend pages to {DEADEND_PAGES_FILE}")
logger.info(f"Found {len(categorized_pages)} pages that have been categorized since the last run")
else:
logger.warning("No deadend pages were fetched.")
# Fetch wiki pages for each key
wiki_pages = []