mirror of
https://forge.chapril.org/tykayn/osm-commerces
synced 2025-10-04 17:04:53 +02:00
668 lines
No EOL
25 KiB
Python
Executable file
668 lines
No EOL
25 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
wiki_compare.py
|
|
|
|
This script fetches the most used OpenStreetMap keys from TagInfo,
|
|
compares their English and French wiki pages, and identifies which pages
|
|
need updating based on modification dates and content analysis.
|
|
|
|
Usage:
|
|
python wiki_compare.py
|
|
|
|
Output:
|
|
- top_keys.json: JSON file containing the most used OSM keys
|
|
- wiki_pages.csv: CSV file with information about each wiki page
|
|
- outdated_pages.json: JSON file containing pages that need updating
|
|
- A console output listing the wiki pages that need updating
|
|
"""
|
|
|
|
import json
|
|
import csv
|
|
import requests
|
|
import re
|
|
import os
|
|
from datetime import datetime
|
|
from bs4 import BeautifulSoup
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Constants
|
|
TAGINFO_API_URL = "https://taginfo.openstreetmap.org/api/4/keys/all"
|
|
WIKI_BASE_URL_EN = "https://wiki.openstreetmap.org/wiki/Key:"
|
|
WIKI_BASE_URL_FR = "https://wiki.openstreetmap.org/wiki/FR:Key:"
|
|
TOP_KEYS_FILE = "top_keys.json"
|
|
WIKI_PAGES_CSV = "wiki_pages.csv"
|
|
OUTDATED_PAGES_FILE = "outdated_pages.json"
|
|
# Number of wiki pages to examine
|
|
NUM_WIKI_PAGES = 50
|
|
|
|
def fetch_top_keys(limit=NUM_WIKI_PAGES):
|
|
"""
|
|
Fetch the most used OSM keys from TagInfo API
|
|
|
|
Args:
|
|
limit (int): Number of keys to fetch
|
|
|
|
Returns:
|
|
list: List of dictionaries containing key information
|
|
"""
|
|
logger.info(f"Fetching top {limit} OSM keys from TagInfo API...")
|
|
|
|
params = {
|
|
'page': 1,
|
|
'rp': limit,
|
|
'sortname': 'count_all',
|
|
'sortorder': 'desc'
|
|
}
|
|
|
|
try:
|
|
response = requests.get(TAGINFO_API_URL, params=params)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Extract just the key names and counts
|
|
top_keys = [{'key': item['key'], 'count': item['count_all']} for item in data['data']]
|
|
|
|
logger.info(f"Successfully fetched {len(top_keys)} keys")
|
|
return top_keys
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching data from TagInfo API: {e}")
|
|
return []
|
|
|
|
def save_to_json(data, filename):
|
|
"""
|
|
Save data to a JSON file
|
|
|
|
Args:
|
|
data: Data to save
|
|
filename (str): Name of the file
|
|
"""
|
|
try:
|
|
with open(filename, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
logger.info(f"Data saved to {filename}")
|
|
except IOError as e:
|
|
logger.error(f"Error saving data to {filename}: {e}")
|
|
|
|
def fetch_wiki_page(key, language='en'):
|
|
"""
|
|
Fetch wiki page for a given key
|
|
|
|
Args:
|
|
key (str): OSM key
|
|
language (str): Language code ('en' or 'fr')
|
|
|
|
Returns:
|
|
dict: Dictionary with page information or None if page doesn't exist
|
|
"""
|
|
base_url = WIKI_BASE_URL_EN if language == 'en' else WIKI_BASE_URL_FR
|
|
url = f"{base_url}{key}"
|
|
|
|
logger.info(f"Fetching {language} wiki page for key '{key}': {url}")
|
|
|
|
try:
|
|
response = requests.get(url)
|
|
|
|
# Check if page exists
|
|
if response.status_code == 404:
|
|
logger.warning(f"Wiki page for key '{key}' in {language} does not exist")
|
|
return None
|
|
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
# Get last modification date
|
|
last_modified = None
|
|
footer_info = soup.select_one('#footer-info-lastmod')
|
|
if footer_info:
|
|
date_text = footer_info.text
|
|
# Extract date using regex
|
|
date_match = re.search(r'(\d{1,2} \w+ \d{4})', date_text)
|
|
if date_match:
|
|
date_str = date_match.group(1)
|
|
try:
|
|
# Parse date (format may vary based on wiki language)
|
|
last_modified = datetime.strptime(date_str, '%d %B %Y').strftime('%Y-%m-%d')
|
|
except ValueError:
|
|
logger.warning(f"Could not parse date: {date_str}")
|
|
|
|
# Extract sections (h2, h3, h4)
|
|
section_elements = soup.select('h2, h3, h4')
|
|
sections = len(section_elements)
|
|
|
|
# Extract section titles
|
|
section_titles = []
|
|
for section_elem in section_elements:
|
|
# Skip sections that are part of the table of contents, navigation, or DescriptionBox
|
|
if section_elem.parent and section_elem.parent.get('id') in ['toc', 'mw-navigation']:
|
|
continue
|
|
|
|
# Skip sections that are inside a table with class DescriptionBox
|
|
if section_elem.find_parent('table', class_='DescriptionBox'):
|
|
continue
|
|
|
|
# Get the text of the section title, removing any edit links
|
|
for edit_link in section_elem.select('.mw-editsection'):
|
|
edit_link.extract()
|
|
|
|
section_title = section_elem.get_text(strip=True)
|
|
section_level = int(section_elem.name[1]) # h2 -> 2, h3 -> 3, h4 -> 4
|
|
|
|
section_titles.append({
|
|
'title': section_title,
|
|
'level': section_level
|
|
})
|
|
|
|
# Count words in the content
|
|
content = soup.select_one('#mw-content-text')
|
|
if content:
|
|
# Remove script and style elements
|
|
for script in content.select('script, style'):
|
|
script.extract()
|
|
|
|
# Remove .languages elements
|
|
for languages_elem in content.select('.languages'):
|
|
languages_elem.extract()
|
|
|
|
# Get text and count words
|
|
text = content.get_text(separator=' ', strip=True)
|
|
word_count = len(text.split())
|
|
|
|
# Extract links
|
|
links = content.select('a')
|
|
link_count = len(links)
|
|
|
|
# Get link details (text and href)
|
|
link_details = []
|
|
for link in links:
|
|
href = link.get('href', '')
|
|
# Skip edit section links and other non-content links
|
|
if 'action=edit' in href or 'redlink=1' in href or not href:
|
|
continue
|
|
|
|
# Make relative URLs absolute
|
|
if href.startswith('/'):
|
|
href = 'https://wiki.openstreetmap.org' + href
|
|
|
|
link_text = link.get_text(strip=True)
|
|
if link_text: # Only include links with text
|
|
link_details.append({
|
|
'text': link_text,
|
|
'href': href
|
|
})
|
|
|
|
# Extract media (images)
|
|
media_elements = content.select('img')
|
|
media_count = len(media_elements)
|
|
|
|
# Get media details (src and alt text)
|
|
media_details = []
|
|
for img in media_elements:
|
|
src = img.get('src', '')
|
|
if src:
|
|
# Make relative URLs absolute
|
|
if src.startswith('//'):
|
|
src = 'https:' + src
|
|
elif src.startswith('/'):
|
|
src = 'https://wiki.openstreetmap.org' + src
|
|
|
|
alt_text = img.get('alt', '')
|
|
media_details.append({
|
|
'src': src,
|
|
'alt': alt_text
|
|
})
|
|
|
|
# Extract categories
|
|
categories = []
|
|
category_links = soup.select('#mw-normal-catlinks li a')
|
|
for cat_link in category_links:
|
|
categories.append(cat_link.get_text(strip=True))
|
|
else:
|
|
word_count = 0
|
|
link_count = 0
|
|
link_details = []
|
|
media_count = 0
|
|
media_details = []
|
|
categories = []
|
|
|
|
return {
|
|
'key': key,
|
|
'language': language,
|
|
'url': url,
|
|
'last_modified': last_modified,
|
|
'sections': sections,
|
|
'section_titles': section_titles,
|
|
'word_count': word_count,
|
|
'link_count': link_count,
|
|
'link_details': link_details,
|
|
'media_count': media_count,
|
|
'media_details': media_details,
|
|
'categories': categories
|
|
}
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching wiki page for key '{key}' in {language}: {e}")
|
|
return None
|
|
|
|
def analyze_wiki_pages(pages):
|
|
"""
|
|
Analyze wiki pages to determine which ones need updating
|
|
|
|
Args:
|
|
pages (list): List of dictionaries containing page information
|
|
|
|
Returns:
|
|
list: List of pages that need updating, sorted by priority
|
|
"""
|
|
logger.info("Analyzing wiki pages to identify those needing updates...")
|
|
|
|
# Group pages by key
|
|
pages_by_key = {}
|
|
for page in pages:
|
|
if page is None:
|
|
continue
|
|
|
|
key = page['key']
|
|
if key not in pages_by_key:
|
|
pages_by_key[key] = {}
|
|
|
|
pages_by_key[key][page['language']] = page
|
|
|
|
# Analyze each key's pages
|
|
needs_update = []
|
|
|
|
for key, lang_pages in pages_by_key.items():
|
|
# Skip if either language is missing
|
|
if 'en' not in lang_pages or 'fr' not in lang_pages:
|
|
if 'en' in lang_pages:
|
|
# French page is missing
|
|
# For missing French pages, calculate a high staleness score
|
|
# Use word count as the main factor (50% weight)
|
|
missing_staleness_score = (
|
|
30 * 0.2 + # Assume 30 days outdated (20%)
|
|
lang_pages['en']['word_count'] / 100 * 0.5 + # Word count (50%)
|
|
lang_pages['en']['sections'] * 0.15 + # Sections (15%)
|
|
lang_pages['en']['link_count'] / 10 * 0.15 # Links (15%)
|
|
)
|
|
|
|
# Round to 2 decimal places and ensure it's high
|
|
missing_staleness_score = max(100, round(missing_staleness_score, 2))
|
|
|
|
# Get media count or default to 0
|
|
media_count = lang_pages['en'].get('media_count', 0)
|
|
|
|
needs_update.append({
|
|
'key': key,
|
|
'reason': 'French page missing',
|
|
'en_page': lang_pages['en'],
|
|
'fr_page': None,
|
|
'date_diff': 0,
|
|
'word_diff': lang_pages['en']['word_count'],
|
|
'section_diff': lang_pages['en']['sections'],
|
|
'link_diff': lang_pages['en']['link_count'],
|
|
'media_diff': media_count,
|
|
'staleness_score': missing_staleness_score,
|
|
'priority': missing_staleness_score, # Use staleness score as priority
|
|
'section_comparison': None, # No comparison possible
|
|
'link_comparison': None, # No comparison possible
|
|
'media_comparison': None, # No comparison possible
|
|
'category_comparison': None # No comparison possible
|
|
})
|
|
continue
|
|
|
|
en_page = lang_pages['en']
|
|
fr_page = lang_pages['fr']
|
|
|
|
# Skip if dates are missing
|
|
if not en_page['last_modified'] or not fr_page['last_modified']:
|
|
continue
|
|
|
|
# Calculate date difference in days
|
|
en_date = datetime.strptime(en_page['last_modified'], '%Y-%m-%d')
|
|
fr_date = datetime.strptime(fr_page['last_modified'], '%Y-%m-%d')
|
|
date_diff = (en_date - fr_date).days
|
|
|
|
# Calculate content differences
|
|
word_diff = en_page['word_count'] - fr_page['word_count']
|
|
section_diff = en_page['sections'] - fr_page['sections']
|
|
link_diff = en_page['link_count'] - fr_page['link_count']
|
|
media_diff = en_page.get('media_count', 0) - fr_page.get('media_count', 0)
|
|
|
|
# Calculate staleness score (higher means more outdated/stale)
|
|
# Weight factors adjusted to emphasize word count differences
|
|
staleness_score = (
|
|
abs(date_diff) * 0.2 + # Date difference (20%)
|
|
abs(word_diff) / 100 * 0.5 + # Word count difference (normalized) (50%)
|
|
abs(section_diff) * 0.15 + # Section difference (15%)
|
|
abs(link_diff) / 10 * 0.15 # Link count difference (normalized) (15%)
|
|
)
|
|
|
|
# Round to 2 decimal places for display
|
|
staleness_score = round(staleness_score, 2)
|
|
|
|
# Compare sections between English and French pages
|
|
section_comparison = {
|
|
'en_only': [],
|
|
'fr_only': [],
|
|
'common': []
|
|
}
|
|
|
|
# Extract section titles for comparison
|
|
en_sections = {section['title'].lower(): section for section in en_page.get('section_titles', [])}
|
|
fr_sections = {section['title'].lower(): section for section in fr_page.get('section_titles', [])}
|
|
|
|
# Find sections only in English
|
|
for title, section in en_sections.items():
|
|
if title not in fr_sections:
|
|
section_comparison['en_only'].append(section)
|
|
|
|
# Find sections only in French
|
|
for title, section in fr_sections.items():
|
|
if title not in en_sections:
|
|
section_comparison['fr_only'].append(section)
|
|
|
|
# Find common sections
|
|
for title in en_sections.keys():
|
|
if title in fr_sections:
|
|
section_comparison['common'].append({
|
|
'en': en_sections[title],
|
|
'fr': fr_sections[title]
|
|
})
|
|
|
|
# Compare links between English and French pages
|
|
link_comparison = {
|
|
'en_only': [],
|
|
'fr_only': [],
|
|
'common': []
|
|
}
|
|
|
|
# Extract link texts for comparison (case insensitive)
|
|
en_links = {link['text'].lower(): link for link in en_page.get('link_details', [])}
|
|
fr_links = {link['text'].lower(): link for link in fr_page.get('link_details', [])}
|
|
|
|
# Find links only in English
|
|
for text, link in en_links.items():
|
|
if text not in fr_links:
|
|
link_comparison['en_only'].append(link)
|
|
|
|
# Find links only in French
|
|
for text, link in fr_links.items():
|
|
if text not in en_links:
|
|
link_comparison['fr_only'].append(link)
|
|
|
|
# Find common links
|
|
for text in en_links.keys():
|
|
if text in fr_links:
|
|
link_comparison['common'].append({
|
|
'en': en_links[text],
|
|
'fr': fr_links[text]
|
|
})
|
|
|
|
# Compare media between English and French pages
|
|
media_comparison = {
|
|
'en_only': [],
|
|
'fr_only': [],
|
|
'common': []
|
|
}
|
|
|
|
# Extract media alt texts for comparison (case insensitive)
|
|
en_media = {media['alt'].lower(): media for media in en_page.get('media_details', []) if media['alt']}
|
|
fr_media = {media['alt'].lower(): media for media in fr_page.get('media_details', []) if media['alt']}
|
|
|
|
# Find media only in English
|
|
for alt, media in en_media.items():
|
|
if alt not in fr_media:
|
|
media_comparison['en_only'].append(media)
|
|
|
|
# Find media only in French
|
|
for alt, media in fr_media.items():
|
|
if alt not in en_media:
|
|
media_comparison['fr_only'].append(media)
|
|
|
|
# Find common media
|
|
for alt in en_media.keys():
|
|
if alt in fr_media:
|
|
media_comparison['common'].append({
|
|
'en': en_media[alt],
|
|
'fr': fr_media[alt]
|
|
})
|
|
|
|
# Add media without alt text to their respective language-only lists
|
|
for media in en_page.get('media_details', []):
|
|
if not media['alt'] or media['alt'].lower() not in en_media:
|
|
media_comparison['en_only'].append(media)
|
|
|
|
for media in fr_page.get('media_details', []):
|
|
if not media['alt'] or media['alt'].lower() not in fr_media:
|
|
media_comparison['fr_only'].append(media)
|
|
|
|
# Compare categories between English and French pages
|
|
category_comparison = {
|
|
'en_only': [],
|
|
'fr_only': [],
|
|
'common': []
|
|
}
|
|
|
|
# Extract categories for comparison (case insensitive)
|
|
en_categories = [cat.lower() for cat in en_page.get('categories', [])]
|
|
fr_categories = [cat.lower() for cat in fr_page.get('categories', [])]
|
|
|
|
# Find categories only in English
|
|
for cat in en_page.get('categories', []):
|
|
if cat.lower() not in fr_categories:
|
|
category_comparison['en_only'].append(cat)
|
|
|
|
# Find categories only in French
|
|
for cat in fr_page.get('categories', []):
|
|
if cat.lower() not in en_categories:
|
|
category_comparison['fr_only'].append(cat)
|
|
|
|
# Find common categories
|
|
for cat in en_page.get('categories', []):
|
|
if cat.lower() in fr_categories:
|
|
category_comparison['common'].append(cat)
|
|
|
|
if date_diff > 30 or word_diff > 200 or section_diff > 2 or link_diff > 20 or fr_page['word_count'] < en_page['word_count'] * 0.7:
|
|
reason = []
|
|
if date_diff > 30:
|
|
reason.append(f"La version Française est datée de {date_diff} jours")
|
|
if word_diff > 200:
|
|
reason.append(f"La version Anglaise a {word_diff} plus de mots")
|
|
if section_diff > 2:
|
|
reason.append(f"La version Anglaise a {section_diff} plus de sections")
|
|
if link_diff > 20:
|
|
reason.append(f"La version Anglaise a {link_diff} plus de liens")
|
|
if media_diff > 5:
|
|
reason.append(f"La version Anglaise a {media_diff} plus d'images")
|
|
if fr_page['word_count'] < en_page['word_count'] * 0.7:
|
|
reason.append(f"La version Française a seulement {fr_page['word_count'] / en_page['word_count']:.0%} % du contenu en Anglais.")
|
|
|
|
needs_update.append({
|
|
'key': key,
|
|
'reason': ', '.join(reason),
|
|
'en_page': en_page,
|
|
'fr_page': fr_page,
|
|
'date_diff': date_diff,
|
|
'word_diff': word_diff,
|
|
'section_diff': section_diff,
|
|
'link_diff': link_diff,
|
|
'media_diff': media_diff,
|
|
'staleness_score': staleness_score,
|
|
'priority': staleness_score, # Use staleness score as priority
|
|
'section_comparison': section_comparison,
|
|
'link_comparison': link_comparison,
|
|
'media_comparison': media_comparison,
|
|
'category_comparison': category_comparison
|
|
})
|
|
|
|
# Sort by priority (descending)
|
|
needs_update.sort(key=lambda x: x['priority'], reverse=True)
|
|
|
|
return needs_update
|
|
|
|
def main():
|
|
"""Main function to execute the script"""
|
|
logger.info("Starting wiki_compare.py")
|
|
|
|
# Create output directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(os.path.abspath(__file__)), exist_ok=True)
|
|
|
|
# Fetch top keys
|
|
top_keys = fetch_top_keys(NUM_WIKI_PAGES)
|
|
|
|
if not top_keys:
|
|
logger.error("Failed to fetch top keys. Exiting.")
|
|
return
|
|
|
|
# Save top keys to JSON
|
|
save_to_json(top_keys, TOP_KEYS_FILE)
|
|
|
|
# Fetch wiki pages for each key
|
|
wiki_pages = []
|
|
|
|
for key_info in top_keys:
|
|
key = key_info['key']
|
|
|
|
# Fetch English page
|
|
en_page = fetch_wiki_page(key, 'en')
|
|
if en_page:
|
|
wiki_pages.append(en_page)
|
|
|
|
# Fetch French page
|
|
fr_page = fetch_wiki_page(key, 'fr')
|
|
if fr_page:
|
|
wiki_pages.append(fr_page)
|
|
|
|
# Process wiki pages to add staleness score
|
|
processed_wiki_pages = []
|
|
pages_by_key = {}
|
|
|
|
# Group pages by key
|
|
for page in wiki_pages:
|
|
if page is None:
|
|
continue
|
|
|
|
key = page['key']
|
|
if key not in pages_by_key:
|
|
pages_by_key[key] = {}
|
|
|
|
pages_by_key[key][page['language']] = page
|
|
|
|
# Calculate staleness score for each pair of pages
|
|
for key, lang_pages in pages_by_key.items():
|
|
# Add English page with staleness score
|
|
if 'en' in lang_pages:
|
|
en_page = lang_pages['en'].copy()
|
|
|
|
# If French page exists, calculate staleness score
|
|
if 'fr' in lang_pages:
|
|
fr_page = lang_pages['fr']
|
|
|
|
# Skip if dates are missing
|
|
if en_page['last_modified'] and fr_page['last_modified']:
|
|
# Calculate date difference in days
|
|
en_date = datetime.strptime(en_page['last_modified'], '%Y-%m-%d')
|
|
fr_date = datetime.strptime(fr_page['last_modified'], '%Y-%m-%d')
|
|
date_diff = (en_date - fr_date).days
|
|
|
|
# Calculate content differences
|
|
word_diff = en_page['word_count'] - fr_page['word_count']
|
|
section_diff = en_page['sections'] - fr_page['sections']
|
|
link_diff = en_page['link_count'] - fr_page['link_count']
|
|
|
|
# Calculate staleness score
|
|
staleness_score = (
|
|
abs(date_diff) * 0.2 +
|
|
abs(word_diff) / 100 * 0.5 +
|
|
abs(section_diff) * 0.15 +
|
|
abs(link_diff) / 10 * 0.15
|
|
)
|
|
|
|
# Round to 2 decimal places
|
|
staleness_score = round(staleness_score, 2)
|
|
|
|
en_page['staleness_score'] = staleness_score
|
|
fr_page['staleness_score'] = staleness_score
|
|
else:
|
|
en_page['staleness_score'] = 0
|
|
fr_page['staleness_score'] = 0
|
|
|
|
processed_wiki_pages.append(en_page)
|
|
processed_wiki_pages.append(fr_page)
|
|
else:
|
|
# French page is missing, calculate a high staleness score
|
|
missing_staleness_score = (
|
|
30 * 0.2 +
|
|
en_page['word_count'] / 100 * 0.5 +
|
|
en_page['sections'] * 0.15 +
|
|
en_page['link_count'] / 10 * 0.15
|
|
)
|
|
|
|
# Round to 2 decimal places and ensure it's high
|
|
missing_staleness_score = max(100, round(missing_staleness_score, 2))
|
|
|
|
en_page['staleness_score'] = missing_staleness_score
|
|
processed_wiki_pages.append(en_page)
|
|
|
|
# Add French page without English counterpart (rare case)
|
|
elif 'fr' in lang_pages:
|
|
fr_page = lang_pages['fr'].copy()
|
|
fr_page['staleness_score'] = 0
|
|
processed_wiki_pages.append(fr_page)
|
|
|
|
# Save processed wiki pages to CSV
|
|
try:
|
|
with open(WIKI_PAGES_CSV, 'w', newline='', encoding='utf-8') as f:
|
|
# Basic fields for CSV (detailed content will be in JSON only)
|
|
fieldnames = ['key', 'language', 'url', 'last_modified', 'sections', 'word_count', 'link_count', 'media_count', 'staleness_score']
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
for page in processed_wiki_pages:
|
|
if page: # Skip None values
|
|
# Create a copy with only the CSV fields
|
|
csv_page = {field: page.get(field, '') for field in fieldnames if field in page}
|
|
writer.writerow(csv_page)
|
|
|
|
logger.info(f"Wiki page data saved to {WIKI_PAGES_CSV}")
|
|
|
|
except IOError as e:
|
|
logger.error(f"Error saving data to {WIKI_PAGES_CSV}: {e}")
|
|
return
|
|
|
|
# Analyze pages to find those needing updates
|
|
pages_to_update = analyze_wiki_pages(wiki_pages)
|
|
|
|
# Save pages that need updating to JSON
|
|
save_to_json(pages_to_update, OUTDATED_PAGES_FILE)
|
|
|
|
# Print the top pages needing updates
|
|
print(f"\n===== TOP {min(NUM_WIKI_PAGES, len(pages_to_update))} WIKI PAGES NEEDING UPDATES =====")
|
|
|
|
for i, page in enumerate(pages_to_update[:NUM_WIKI_PAGES], 1):
|
|
key = page['key']
|
|
reason = page['reason']
|
|
en_url = page['en_page']['url'] if page['en_page'] else "N/A"
|
|
fr_url = page['fr_page']['url'] if page['fr_page'] else "N/A"
|
|
|
|
print(f"{i}. Key: {key}")
|
|
print(f" Reason: {reason}")
|
|
print(f" English: {en_url}")
|
|
print(f" French: {fr_url}")
|
|
print()
|
|
|
|
logger.info("Script completed successfully")
|
|
|
|
if __name__ == "__main__":
|
|
main() |