change template panel left, create dashboard
This commit is contained in:
parent
381f378db4
commit
539b4c094f
24 changed files with 1367 additions and 166 deletions
|
@ -61,10 +61,12 @@ WIKI_BASE_URL_EN = "https://wiki.openstreetmap.org/wiki/Key:"
|
|||
WIKI_BASE_URL_FR = "https://wiki.openstreetmap.org/wiki/FR:Key:"
|
||||
WIKI_BASE_URL = "https://wiki.openstreetmap.org/wiki/"
|
||||
WIKI_CATEGORY_URL = "https://wiki.openstreetmap.org/wiki/Category:FR:Traductions_d%C3%A9synchronis%C3%A9es"
|
||||
WIKI_DEADEND_PAGES_URL = "https://wiki.openstreetmap.org/w/index.php?title=Special:DeadendPages&limit=500&offset=1000"
|
||||
TOP_KEYS_FILE = "top_keys.json"
|
||||
KEYS_WITHOUT_WIKI_FILE = "keys_without_wiki.json"
|
||||
WIKI_PAGES_CSV = "wiki_pages.csv"
|
||||
OUTDATED_PAGES_FILE = "outdated_pages.json"
|
||||
DEADEND_PAGES_FILE = "deadend_pages.json"
|
||||
STALENESS_HISTOGRAM_FILE = "staleness_histogram.png"
|
||||
# Number of wiki pages to examine
|
||||
NUM_WIKI_PAGES = 2
|
||||
|
@ -154,6 +156,161 @@ def fetch_desynchronized_pages():
|
|||
logger.error(f"Error fetching category page: {e}")
|
||||
return []
|
||||
|
||||
def suggest_categories(page_title, page_url):
|
||||
"""
|
||||
Suggest categories for an uncategorized page based on its title and content
|
||||
|
||||
Args:
|
||||
page_title (str): Title of the page
|
||||
page_url (str): URL of the page
|
||||
|
||||
Returns:
|
||||
list: List of suggested categories
|
||||
"""
|
||||
logger.info(f"Suggesting categories for page: {page_title}")
|
||||
|
||||
suggested_categories = []
|
||||
|
||||
# Common categories for French OSM wiki pages
|
||||
common_categories = [
|
||||
"Documentation OSM en français",
|
||||
"Cartographie",
|
||||
"Contributeurs",
|
||||
"Développeurs",
|
||||
"Éléments cartographiés",
|
||||
"Imports",
|
||||
"Logiciels",
|
||||
"Projets",
|
||||
"Rencontres",
|
||||
"Utilisateurs"
|
||||
]
|
||||
|
||||
# Add geography-related categories for pages about France
|
||||
if "France" in page_title:
|
||||
suggested_categories.append("France")
|
||||
|
||||
# Check for specific regions or departments
|
||||
regions = [
|
||||
"Auvergne-Rhône-Alpes", "Bourgogne-Franche-Comté", "Bretagne",
|
||||
"Centre-Val de Loire", "Corse", "Grand Est", "Hauts-de-France",
|
||||
"Île-de-France", "Normandie", "Nouvelle-Aquitaine",
|
||||
"Occitanie", "Pays de la Loire", "Provence-Alpes-Côte d'Azur"
|
||||
]
|
||||
|
||||
for region in regions:
|
||||
if region in page_title:
|
||||
suggested_categories.append(region)
|
||||
|
||||
# Try to fetch the page content to make better suggestions
|
||||
try:
|
||||
response = requests.get(page_url)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Get the main content
|
||||
content = soup.select_one('#mw-content-text')
|
||||
if content:
|
||||
text = content.get_text(separator=' ', strip=True).lower()
|
||||
|
||||
# Check for keywords related to common categories
|
||||
if any(keyword in text for keyword in ["carte", "cartographie", "mapper"]):
|
||||
suggested_categories.append("Cartographie")
|
||||
|
||||
if any(keyword in text for keyword in ["contribuer", "contributeur", "éditer"]):
|
||||
suggested_categories.append("Contributeurs")
|
||||
|
||||
if any(keyword in text for keyword in ["développeur", "programmer", "code", "api"]):
|
||||
suggested_categories.append("Développeurs")
|
||||
|
||||
if any(keyword in text for keyword in ["tag", "clé", "valeur", "élément", "nœud", "way", "relation"]):
|
||||
suggested_categories.append("Éléments cartographiés")
|
||||
|
||||
if any(keyword in text for keyword in ["import", "données", "dataset"]):
|
||||
suggested_categories.append("Imports")
|
||||
|
||||
if any(keyword in text for keyword in ["logiciel", "application", "outil"]):
|
||||
suggested_categories.append("Logiciels")
|
||||
|
||||
if any(keyword in text for keyword in ["projet", "initiative"]):
|
||||
suggested_categories.append("Projets")
|
||||
|
||||
if any(keyword in text for keyword in ["rencontre", "réunion", "événement", "conférence"]):
|
||||
suggested_categories.append("Rencontres")
|
||||
|
||||
if any(keyword in text for keyword in ["utiliser", "utilisateur", "usage"]):
|
||||
suggested_categories.append("Utilisateurs")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Error fetching page content for category suggestions: {e}")
|
||||
# If we can't fetch the content, suggest common categories based on title only
|
||||
if "projet" in page_title.lower():
|
||||
suggested_categories.append("Projets")
|
||||
elif "logiciel" in page_title.lower() or "application" in page_title.lower():
|
||||
suggested_categories.append("Logiciels")
|
||||
elif "rencontre" in page_title.lower() or "réunion" in page_title.lower():
|
||||
suggested_categories.append("Rencontres")
|
||||
|
||||
# Always suggest the general French documentation category
|
||||
suggested_categories.append("Documentation OSM en français")
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_categories = []
|
||||
for cat in suggested_categories:
|
||||
if cat not in seen:
|
||||
seen.add(cat)
|
||||
unique_categories.append(cat)
|
||||
|
||||
logger.info(f"Suggested {len(unique_categories)} categories for {page_title}: {', '.join(unique_categories)}")
|
||||
return unique_categories
|
||||
|
||||
def fetch_deadend_pages():
|
||||
"""
|
||||
Fetch pages starting with "France" from the DeadendPages list
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing page information
|
||||
"""
|
||||
logger.info(f"Fetching pages from DeadendPages list: {WIKI_DEADEND_PAGES_URL}")
|
||||
|
||||
try:
|
||||
response = requests.get(WIKI_DEADEND_PAGES_URL)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Find all links in the DeadendPages list
|
||||
page_links = []
|
||||
for link in soup.select('.mw-spcontent li a'):
|
||||
href = link.get('href', '')
|
||||
title = link.get_text(strip=True)
|
||||
|
||||
# Skip if it's not a wiki page or if it's a special page
|
||||
if not href.startswith('/wiki/') or 'Special:' in href:
|
||||
continue
|
||||
|
||||
# Filter pages that start with "France"
|
||||
if title.startswith('France'):
|
||||
# Get the full URL
|
||||
full_url = 'https://wiki.openstreetmap.org' + href
|
||||
|
||||
# Suggest categories for this page
|
||||
suggested_categories = suggest_categories(title, full_url)
|
||||
|
||||
page_links.append({
|
||||
'title': title,
|
||||
'url': full_url,
|
||||
'suggested_categories': suggested_categories
|
||||
})
|
||||
|
||||
logger.info(f"Found {len(page_links)} pages starting with 'France' in the DeadendPages list")
|
||||
return page_links
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching DeadendPages list: {e}")
|
||||
return []
|
||||
|
||||
def fetch_top_keys(limit=NUM_WIKI_PAGES):
|
||||
"""
|
||||
Fetch the most used OSM keys from TagInfo API
|
||||
|
@ -1365,10 +1522,11 @@ def main():
|
|||
3. Fetches and processes wiki pages for these keys
|
||||
4. Processes specific wiki pages listed in SPECIFIC_PAGES
|
||||
5. Processes pages from the FR:Traductions_désynchronisées category
|
||||
6. Calculates staleness scores for all pages
|
||||
7. Generates a histogram of staleness scores
|
||||
8. Saves the results to CSV and JSON files
|
||||
9. Prints a list of pages that need updating
|
||||
6. Processes pages starting with "France" from the DeadendPages list
|
||||
7. Calculates staleness scores for all pages
|
||||
8. Generates a histogram of staleness scores
|
||||
9. Saves the results to CSV and JSON files
|
||||
10. Prints a list of pages that need updating
|
||||
"""
|
||||
# Parse command-line arguments
|
||||
parser = argparse.ArgumentParser(description='Compare OpenStreetMap wiki pages in English and French.')
|
||||
|
@ -1404,6 +1562,62 @@ def main():
|
|||
logger.info(f"Saved {len(keys_without_wiki)} keys without wiki pages to {KEYS_WITHOUT_WIKI_FILE}")
|
||||
else:
|
||||
logger.warning("No keys without wiki pages were fetched.")
|
||||
|
||||
# Fetch pages starting with "France" from the DeadendPages list
|
||||
deadend_pages = fetch_deadend_pages()
|
||||
|
||||
if deadend_pages:
|
||||
# Load existing deadend pages data to compare with history
|
||||
existing_data = load_json_data(DEADEND_PAGES_FILE)
|
||||
|
||||
# Initialize history if it doesn't exist
|
||||
if 'history' not in existing_data:
|
||||
existing_data['history'] = {}
|
||||
|
||||
# Get the most recent history entry
|
||||
sorted_timestamps = sorted(existing_data.get('history', {}).keys())
|
||||
previous_pages = []
|
||||
if sorted_timestamps:
|
||||
latest_timestamp = sorted_timestamps[-1]
|
||||
previous_pages = existing_data['history'][latest_timestamp].get('pages', [])
|
||||
|
||||
# Find pages that were in the previous list but are no longer in the current list
|
||||
previous_urls = [page['url'] for page in previous_pages]
|
||||
current_urls = [page['url'] for page in deadend_pages]
|
||||
|
||||
categorized_pages = []
|
||||
for url in previous_urls:
|
||||
if url not in current_urls:
|
||||
# Find the page in previous_pages
|
||||
for page in previous_pages:
|
||||
if page['url'] == url:
|
||||
# This page is no longer in the DeadendPages list, which means it has been categorized
|
||||
categorized_pages.append(page)
|
||||
break
|
||||
|
||||
# Create a timestamp for the current data
|
||||
current_timestamp = datetime.now().isoformat()
|
||||
|
||||
# Create the history entry
|
||||
history_entry = {
|
||||
'pages': deadend_pages,
|
||||
'categorized_pages': categorized_pages
|
||||
}
|
||||
|
||||
# Add the entry to history with timestamp as key
|
||||
existing_data['history'][current_timestamp] = history_entry
|
||||
|
||||
# Update the current data
|
||||
existing_data['pages'] = deadend_pages
|
||||
existing_data['categorized_pages'] = categorized_pages
|
||||
existing_data['last_updated'] = current_timestamp
|
||||
|
||||
# Save the updated data
|
||||
save_to_json(existing_data, DEADEND_PAGES_FILE)
|
||||
logger.info(f"Saved {len(deadend_pages)} deadend pages to {DEADEND_PAGES_FILE}")
|
||||
logger.info(f"Found {len(categorized_pages)} pages that have been categorized since the last run")
|
||||
else:
|
||||
logger.warning("No deadend pages were fetched.")
|
||||
|
||||
# Fetch wiki pages for each key
|
||||
wiki_pages = []
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue