2025-08-22 17:58:04 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import json
import logging
import argparse
import os
2025-08-31 12:22:07 +02:00
import re
import time
2025-08-22 17:58:04 +02:00
from datetime import datetime , timedelta
# Configure logging
logging . basicConfig (
level = logging . INFO ,
format = ' %(asctime)s - %(levelname)s - %(message)s '
)
logger = logging . getLogger ( __name__ )
# URLs for OSM Wiki proposals
VOTING_PROPOSALS_URL = " https://wiki.openstreetmap.org/wiki/Category:Proposals_with_ % 22Voting % 22_status "
RECENT_CHANGES_URL = " https://wiki.openstreetmap.org/w/index.php?title=Special:RecentChanges&namespace=102&limit=50 " # Namespace 102 is for Proposal pages
# Output file
OUTPUT_FILE = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ ) ) , ' proposals.json ' )
# Cache timeout (in hours)
CACHE_TIMEOUT = 1
2025-08-31 12:22:07 +02:00
# Vote patterns (same as in fetch_archived_proposals.py)
VOTE_PATTERNS = {
' approve ' : [
r ' I \ s+(?:(?:strongly|fully|completely|wholeheartedly) \ s+)?(?:approve|support|agree \ s+with) \ s+this \ s+proposal ' ,
r ' I \ s+vote \ s+(?:to \ s+)?(?:approve|support) ' ,
r ' (?:Symbol \ s+support \ s+vote \ .svg|Symbol_support_vote \ .svg) ' ,
] ,
' oppose ' : [
r ' I \ s+(?:(?:strongly|fully|completely|wholeheartedly) \ s+)?(?:oppose|disagree \ s+with|reject|do \ s+not \ s+support) \ s+this \ s+proposal ' ,
r ' I \ s+vote \ s+(?:to \ s+)?(?:oppose|reject|against) ' ,
r ' (?:Symbol \ s+oppose \ s+vote \ .svg|Symbol_oppose_vote \ .svg) ' ,
] ,
' abstain ' : [
r ' I \ s+(?:have \ s+comments \ s+but \ s+)?abstain \ s+from \ s+voting ' ,
r ' I \ s+(?:have \ s+comments \ s+but \ s+)?(?:neither \ s+approve \ s+nor \ s+oppose|am \ s+neutral) ' ,
r ' (?:Symbol \ s+abstain \ s+vote \ .svg|Symbol_abstain_vote \ .svg) ' ,
]
}
2025-08-22 17:58:04 +02:00
def should_update_cache ( ) :
"""
Check if the cache file exists and if it ' s older than the cache timeout
"""
if not os . path . exists ( OUTPUT_FILE ) :
logger . info ( " Cache file doesn ' t exist, creating it " )
return True
# Check file modification time
file_mtime = datetime . fromtimestamp ( os . path . getmtime ( OUTPUT_FILE ) )
now = datetime . now ( )
# If file is older than cache timeout, update it
if now - file_mtime > timedelta ( hours = CACHE_TIMEOUT ) :
logger . info ( f " Cache is older than { CACHE_TIMEOUT } hour(s), updating " )
return True
logger . info ( f " Cache is still fresh (less than { CACHE_TIMEOUT } hour(s) old) " )
return False
2025-08-31 12:22:07 +02:00
def fetch_page ( url ) :
"""
Fetch a page from the OSM wiki
"""
try :
response = requests . get ( url )
response . raise_for_status ( )
return response . text
except requests . exceptions . RequestException as e :
logger . error ( f " Error fetching { url } : { e } " )
return None
def extract_username ( text ) :
"""
Extract username from a signature line
"""
# Common patterns for signatures
patterns = [
r ' -- \ s* \ [ \ [User:([^| \ ]]+)(?: \ |[^ \ ]]+)? \ ] \ ] ' , # --[[User:Username|Username]]
r ' -- \ s* \ [ \ [User:([^| \ ]]+) \ ] \ ] ' , # --[[User:Username]]
r ' -- \ s* \ [ \ [User talk:([^| \ ]]+)(?: \ |[^ \ ]]+)? \ ] \ ] ' , # --[[User talk:Username|Username]]
r ' -- \ s* \ [ \ [User talk:([^| \ ]]+) \ ] \ ] ' , # --[[User talk:Username]]
r ' -- \ s* \ [ \ [Special:Contributions/([^| \ ]]+)(?: \ |[^ \ ]]+)? \ ] \ ] ' , # --[[Special:Contributions/Username|Username]]
r ' -- \ s* \ [ \ [Special:Contributions/([^| \ ]]+) \ ] \ ] ' , # --[[Special:Contributions/Username]]
]
for pattern in patterns :
match = re . search ( pattern , text )
if match :
return match . group ( 1 ) . strip ( )
# If no match found with the patterns, try to find any username-like string
match = re . search ( r ' -- \ s*([A-Za-z0-9_-]+) ' , text )
if match :
return match . group ( 1 ) . strip ( )
return None
def extract_date ( text ) :
"""
Extract date from a signature line
"""
# Look for common date formats in signatures
date_patterns = [
r ' ( \ d { 1,2}: \ d {2} , \ d { 1,2} [A-Za-z]+ \ d {4} ) ' , # 15:30, 25 December 2023
r ' ( \ d { 1,2} [A-Za-z]+ \ d {4} \ d { 1,2}: \ d {2} ) ' , # 25 December 2023 15:30
r ' ( \ d {4} - \ d {2} - \ d {2} T \ d {2} : \ d {2} : \ d {2} ) ' , # 2023-12-25T15:30:00
]
for pattern in date_patterns :
match = re . search ( pattern , text )
if match :
return match . group ( 1 )
return None
def determine_vote_type ( text ) :
"""
Determine the type of vote from the text
"""
text_lower = text . lower ( )
for vote_type , patterns in VOTE_PATTERNS . items ( ) :
for pattern in patterns :
if re . search ( pattern , text_lower , re . IGNORECASE ) :
return vote_type
return None
def extract_votes ( html ) :
"""
Extract voting information from proposal HTML
"""
soup = BeautifulSoup ( html , ' html.parser ' )
# Find the voting section
voting_section = None
for heading in soup . find_all ( [ ' h2 ' , ' h3 ' ] ) :
heading_text = heading . get_text ( ) . lower ( )
if ' voting ' in heading_text or ' votes ' in heading_text or ' poll ' in heading_text :
voting_section = heading
break
if not voting_section :
logger . warning ( " No voting section found " )
return {
' approve ' : { ' count ' : 0 , ' users ' : [ ] } ,
' oppose ' : { ' count ' : 0 , ' users ' : [ ] } ,
' abstain ' : { ' count ' : 0 , ' users ' : [ ] }
}
# Get the content after the voting section heading
votes_content = [ ]
current = voting_section . next_sibling
# Collect all elements until the next heading or the end of the document
while current and not current . name in [ ' h2 ' , ' h3 ' ] :
if current . name : # Skip NavigableString objects
votes_content . append ( current )
current = current . next_sibling
# Process vote lists
votes = {
' approve ' : { ' count ' : 0 , ' users ' : [ ] } ,
' oppose ' : { ' count ' : 0 , ' users ' : [ ] } ,
' abstain ' : { ' count ' : 0 , ' users ' : [ ] }
}
# Look for lists of votes
for element in votes_content :
if element . name == ' ul ' :
for li in element . find_all ( ' li ' ) :
vote_text = li . get_text ( )
vote_type = determine_vote_type ( vote_text )
if vote_type :
username = extract_username ( vote_text )
date = extract_date ( vote_text )
if username :
votes [ vote_type ] [ ' count ' ] + = 1
votes [ vote_type ] [ ' users ' ] . append ( {
' username ' : username ,
' date ' : date
} )
return votes
2025-08-22 17:58:04 +02:00
def fetch_voting_proposals ( ) :
"""
Fetch proposals with " Voting " status from the OSM Wiki
"""
logger . info ( f " Fetching voting proposals from { VOTING_PROPOSALS_URL } " )
try :
response = requests . get ( VOTING_PROPOSALS_URL )
response . raise_for_status ( )
soup = BeautifulSoup ( response . text , ' html.parser ' )
proposals = [ ]
# Find all links in the mw-pages section
links = soup . select ( ' #mw-pages a ' )
for link in links :
# Skip category links and other non-proposal links
if ' Category: ' in link . get ( ' href ' , ' ' ) or ' Special: ' in link . get ( ' href ' , ' ' ) :
continue
proposal_title = link . text . strip ( )
proposal_url = ' https://wiki.openstreetmap.org ' + link . get ( ' href ' , ' ' )
2025-08-31 12:22:07 +02:00
# Create a basic proposal object
proposal = {
2025-08-22 17:58:04 +02:00
' title ' : proposal_title ,
' url ' : proposal_url ,
' status ' : ' Voting ' ,
' type ' : ' voting '
2025-08-31 12:22:07 +02:00
}
# Fetch the proposal page to extract voting information
logger . info ( f " Fetching proposal page: { proposal_title } " )
html = fetch_page ( proposal_url )
if html :
# Extract voting information
votes = extract_votes ( html )
# Add voting information to the proposal
proposal [ ' votes ' ] = votes
# Calculate total votes and percentages
total_votes = votes [ ' approve ' ] [ ' count ' ] + votes [ ' oppose ' ] [ ' count ' ] + votes [ ' abstain ' ] [ ' count ' ]
if total_votes > 0 :
proposal [ ' total_votes ' ] = total_votes
proposal [ ' approve_percentage ' ] = round ( ( votes [ ' approve ' ] [ ' count ' ] / total_votes ) * 100 , 1 )
proposal [ ' oppose_percentage ' ] = round ( ( votes [ ' oppose ' ] [ ' count ' ] / total_votes ) * 100 , 1 )
proposal [ ' abstain_percentage ' ] = round ( ( votes [ ' abstain ' ] [ ' count ' ] / total_votes ) * 100 , 1 )
else :
proposal [ ' total_votes ' ] = 0
proposal [ ' approve_percentage ' ] = 0
proposal [ ' oppose_percentage ' ] = 0
proposal [ ' abstain_percentage ' ] = 0
# Extract proposer from the page
soup = BeautifulSoup ( html , ' html.parser ' )
content = soup . select_one ( ' #mw-content-text ' )
if content :
# Look for table rows with "Proposed by:" in the header cell
for row in content . select ( ' tr ' ) :
cells = row . select ( ' th, td ' )
if len ( cells ) > = 2 :
header_text = cells [ 0 ] . get_text ( ) . strip ( ) . lower ( )
if " proposed by " in header_text :
user_link = cells [ 1 ] . select_one ( ' a[href*= " /wiki/User: " ] ' )
if user_link :
href = user_link . get ( ' href ' , ' ' )
title = user_link . get ( ' title ' , ' ' )
# Try to get username from title attribute first
if title and title . startswith ( ' User: ' ) :
proposal [ ' proposer ' ] = title [ 5 : ] # Remove 'User:' prefix
# Otherwise try to extract from href
elif href :
href_match = re . search ( r ' /wiki/User:([^/]+) ' , href )
if href_match :
proposal [ ' proposer ' ] = href_match . group ( 1 )
# If still no proposer, use the link text
if ' proposer ' not in proposal and user_link . get_text ( ) :
proposal [ ' proposer ' ] = user_link . get_text ( ) . strip ( )
# Add a delay to avoid overloading the server
time . sleep ( 1 )
proposals . append ( proposal )
2025-08-22 17:58:04 +02:00
logger . info ( f " Found { len ( proposals ) } voting proposals " )
return proposals
except requests . exceptions . RequestException as e :
logger . error ( f " Error fetching voting proposals: { e } " )
return [ ]
def fetch_recent_proposals ( ) :
"""
Fetch recently modified proposals from the OSM Wiki
"""
logger . info ( f " Fetching recent changes from { RECENT_CHANGES_URL } " )
try :
response = requests . get ( RECENT_CHANGES_URL )
response . raise_for_status ( )
soup = BeautifulSoup ( response . text , ' html.parser ' )
proposals = [ ]
# Find all change list lines
change_lines = soup . select ( ' .mw-changeslist .mw-changeslist-line ' )
for line in change_lines :
# Get the page title
title_element = line . select_one ( ' .mw-changeslist-title ' )
if not title_element :
continue
page_title = title_element . text . strip ( )
page_url = title_element . get ( ' href ' , ' ' )
if not page_url . startswith ( ' http ' ) :
page_url = f " https://wiki.openstreetmap.org { page_url } "
# Get the timestamp
timestamp_element = line . select_one ( ' .mw-changeslist-date ' )
timestamp = timestamp_element . text . strip ( ) if timestamp_element else " "
# Get the user who made the change
user_element = line . select_one ( ' .mw-userlink ' )
user = user_element . text . strip ( ) if user_element else " Unknown "
# Skip if it's not a proposal page
if not page_title . startswith ( ' Proposal: ' ) :
continue
proposals . append ( {
' title ' : page_title ,
' url ' : page_url ,
' last_modified ' : timestamp ,
' modified_by ' : user ,
' type ' : ' recent '
} )
# Limit to the 10 most recent proposals
proposals = proposals [ : 10 ]
logger . info ( f " Found { len ( proposals ) } recently modified proposals " )
return proposals
except requests . exceptions . RequestException as e :
logger . error ( f " Error fetching recent proposals: { e } " )
return [ ]
def save_proposals ( voting_proposals , recent_proposals ) :
"""
Save the proposals to a JSON file
"""
data = {
' last_updated ' : datetime . now ( ) . isoformat ( ) ,
' voting_proposals ' : voting_proposals ,
' recent_proposals ' : recent_proposals
}
with open ( OUTPUT_FILE , ' w ' , encoding = ' utf-8 ' ) as f :
json . dump ( data , f , ensure_ascii = False , indent = 2 )
logger . info ( f " Saved { len ( voting_proposals ) } voting proposals and { len ( recent_proposals ) } recent proposals to { OUTPUT_FILE } " )
return OUTPUT_FILE
def main ( ) :
parser = argparse . ArgumentParser ( description = ' Fetch OSM Wiki proposals ' )
parser . add_argument ( ' --force ' , action = ' store_true ' , help = ' Force update even if cache is fresh ' )
parser . add_argument ( ' --dry-run ' , action = ' store_true ' , help = ' Print results without saving to file ' )
args = parser . parse_args ( )
# Check if we should update the cache
if args . force or should_update_cache ( ) or args . dry_run :
voting_proposals = fetch_voting_proposals ( )
recent_proposals = fetch_recent_proposals ( )
if args . dry_run :
logger . info ( f " Found { len ( voting_proposals ) } voting proposals: " )
for proposal in voting_proposals :
logger . info ( f " - { proposal [ ' title ' ] } " )
logger . info ( f " Found { len ( recent_proposals ) } recent proposals: " )
for proposal in recent_proposals :
logger . info ( f " - { proposal [ ' title ' ] } (modified by { proposal [ ' modified_by ' ] } on { proposal [ ' last_modified ' ] } ) " )
else :
output_file = save_proposals ( voting_proposals , recent_proposals )
logger . info ( f " Results saved to { output_file } " )
else :
logger . info ( " Using cached proposals data " )
if __name__ == " __main__ " :
main ( )