#!/usr/bin/env python3 # -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup import json import logging import argparse import os import re import time from datetime import datetime, timedelta # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # URLs for OSM Wiki proposals VOTING_PROPOSALS_URL = "https://wiki.openstreetmap.org/wiki/Category:Proposals_with_%22Voting%22_status" RECENT_CHANGES_URL = "https://wiki.openstreetmap.org/w/index.php?title=Special:RecentChanges&namespace=102&limit=50" # Namespace 102 is for Proposal pages # Output file OUTPUT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'proposals.json') # Cache timeout (in hours) CACHE_TIMEOUT = 1 # Vote patterns (same as in fetch_archived_proposals.py) VOTE_PATTERNS = { 'approve': [ r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:approve|support|agree\s+with)\s+this\s+proposal', r'I\s+vote\s+(?:to\s+)?(?:approve|support)', r'(?:Symbol\s+support\s+vote\.svg|Symbol_support_vote\.svg)', ], 'oppose': [ r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:oppose|disagree\s+with|reject|do\s+not\s+support)\s+this\s+proposal', r'I\s+vote\s+(?:to\s+)?(?:oppose|reject|against)', r'(?:Symbol\s+oppose\s+vote\.svg|Symbol_oppose_vote\.svg)', ], 'abstain': [ r'I\s+(?:have\s+comments\s+but\s+)?abstain\s+from\s+voting', r'I\s+(?:have\s+comments\s+but\s+)?(?:neither\s+approve\s+nor\s+oppose|am\s+neutral)', r'(?:Symbol\s+abstain\s+vote\.svg|Symbol_abstain_vote\.svg)', ] } def should_update_cache(): """ Check if the cache file exists and if it's older than the cache timeout """ if not os.path.exists(OUTPUT_FILE): logger.info("Cache file doesn't exist, creating it") return True # Check file modification time file_mtime = datetime.fromtimestamp(os.path.getmtime(OUTPUT_FILE)) now = datetime.now() # If file is older than cache timeout, update it if now - file_mtime > timedelta(hours=CACHE_TIMEOUT): logger.info(f"Cache is older than {CACHE_TIMEOUT} hour(s), updating") return True logger.info(f"Cache is still fresh (less than {CACHE_TIMEOUT} hour(s) old)") return False def fetch_page(url): """ Fetch a page from the OSM wiki """ try: response = requests.get(url) response.raise_for_status() return response.text except requests.exceptions.RequestException as e: logger.error(f"Error fetching {url}: {e}") return None def extract_username(text): """ Extract username from a signature line """ # Common patterns for signatures patterns = [ r'--\s*\[\[User:([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[User:Username|Username]] r'--\s*\[\[User:([^|\]]+)\]\]', # --[[User:Username]] r'--\s*\[\[User talk:([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[User talk:Username|Username]] r'--\s*\[\[User talk:([^|\]]+)\]\]', # --[[User talk:Username]] r'--\s*\[\[Special:Contributions/([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[Special:Contributions/Username|Username]] r'--\s*\[\[Special:Contributions/([^|\]]+)\]\]', # --[[Special:Contributions/Username]] ] for pattern in patterns: match = re.search(pattern, text) if match: return match.group(1).strip() # If no match found with the patterns, try to find any username-like string match = re.search(r'--\s*([A-Za-z0-9_-]+)', text) if match: return match.group(1).strip() return None def extract_date(text): """ Extract date from a signature line """ # Look for common date formats in signatures date_patterns = [ r'(\d{1,2}:\d{2}, \d{1,2} [A-Za-z]+ \d{4})', # 15:30, 25 December 2023 r'(\d{1,2} [A-Za-z]+ \d{4} \d{1,2}:\d{2})', # 25 December 2023 15:30 r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})', # 2023-12-25T15:30:00 ] for pattern in date_patterns: match = re.search(pattern, text) if match: return match.group(1) return None def determine_vote_type(text): """ Determine the type of vote from the text """ text_lower = text.lower() for vote_type, patterns in VOTE_PATTERNS.items(): for pattern in patterns: if re.search(pattern, text_lower, re.IGNORECASE): return vote_type return None def extract_votes(html): """ Extract voting information from proposal HTML """ soup = BeautifulSoup(html, 'html.parser') # Find the voting section voting_section = None for heading in soup.find_all(['h2', 'h3']): heading_text = heading.get_text().lower() if 'voting' in heading_text or 'votes' in heading_text or 'poll' in heading_text: voting_section = heading break if not voting_section: logger.warning("No voting section found") return { 'approve': {'count': 0, 'users': []}, 'oppose': {'count': 0, 'users': []}, 'abstain': {'count': 0, 'users': []} } # Get the content after the voting section heading votes_content = [] current = voting_section.next_sibling # Collect all elements until the next heading or the end of the document while current and not current.name in ['h2', 'h3']: if current.name: # Skip NavigableString objects votes_content.append(current) current = current.next_sibling # Process vote lists votes = { 'approve': {'count': 0, 'users': []}, 'oppose': {'count': 0, 'users': []}, 'abstain': {'count': 0, 'users': []} } # Look for lists of votes for element in votes_content: if element.name == 'ul': for li in element.find_all('li'): vote_text = li.get_text() vote_type = determine_vote_type(vote_text) if vote_type: username = extract_username(vote_text) date = extract_date(vote_text) if username: votes[vote_type]['count'] += 1 votes[vote_type]['users'].append({ 'username': username, 'date': date }) return votes def fetch_voting_proposals(): """ Fetch proposals with "Voting" status from the OSM Wiki """ logger.info(f"Fetching voting proposals from {VOTING_PROPOSALS_URL}") try: response = requests.get(VOTING_PROPOSALS_URL) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') proposals = [] # Find all links in the mw-pages section links = soup.select('#mw-pages a') for link in links: # Skip category links and other non-proposal links if 'Category:' in link.get('href', '') or 'Special:' in link.get('href', ''): continue proposal_title = link.text.strip() proposal_url = 'https://wiki.openstreetmap.org' + link.get('href', '') # Create a basic proposal object proposal = { 'title': proposal_title, 'url': proposal_url, 'status': 'Voting', 'type': 'voting' } # Fetch the proposal page to extract voting information logger.info(f"Fetching proposal page: {proposal_title}") html = fetch_page(proposal_url) if html: # Extract voting information votes = extract_votes(html) # Add voting information to the proposal proposal['votes'] = votes # Calculate total votes and percentages total_votes = votes['approve']['count'] + votes['oppose']['count'] + votes['abstain']['count'] if total_votes > 0: proposal['total_votes'] = total_votes proposal['approve_percentage'] = round((votes['approve']['count'] / total_votes) * 100, 1) proposal['oppose_percentage'] = round((votes['oppose']['count'] / total_votes) * 100, 1) proposal['abstain_percentage'] = round((votes['abstain']['count'] / total_votes) * 100, 1) else: proposal['total_votes'] = 0 proposal['approve_percentage'] = 0 proposal['oppose_percentage'] = 0 proposal['abstain_percentage'] = 0 # Extract proposer from the page soup = BeautifulSoup(html, 'html.parser') content = soup.select_one('#mw-content-text') if content: # Look for table rows with "Proposed by:" in the header cell for row in content.select('tr'): cells = row.select('th, td') if len(cells) >= 2: header_text = cells[0].get_text().strip().lower() if "proposed by" in header_text: user_link = cells[1].select_one('a[href*="/wiki/User:"]') if user_link: href = user_link.get('href', '') title = user_link.get('title', '') # Try to get username from title attribute first if title and title.startswith('User:'): proposal['proposer'] = title[5:] # Remove 'User:' prefix # Otherwise try to extract from href elif href: href_match = re.search(r'/wiki/User:([^/]+)', href) if href_match: proposal['proposer'] = href_match.group(1) # If still no proposer, use the link text if 'proposer' not in proposal and user_link.get_text(): proposal['proposer'] = user_link.get_text().strip() # Add a delay to avoid overloading the server time.sleep(1) proposals.append(proposal) logger.info(f"Found {len(proposals)} voting proposals") return proposals except requests.exceptions.RequestException as e: logger.error(f"Error fetching voting proposals: {e}") return [] def fetch_recent_proposals(): """ Fetch recently modified proposals from the OSM Wiki """ logger.info(f"Fetching recent changes from {RECENT_CHANGES_URL}") try: response = requests.get(RECENT_CHANGES_URL) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') proposals = [] # Find all change list lines change_lines = soup.select('.mw-changeslist .mw-changeslist-line') for line in change_lines: # Get the page title title_element = line.select_one('.mw-changeslist-title') if not title_element: continue page_title = title_element.text.strip() page_url = title_element.get('href', '') if not page_url.startswith('http'): page_url = f"https://wiki.openstreetmap.org{page_url}" # Get the timestamp timestamp_element = line.select_one('.mw-changeslist-date') timestamp = timestamp_element.text.strip() if timestamp_element else "" # Get the user who made the change user_element = line.select_one('.mw-userlink') user = user_element.text.strip() if user_element else "Unknown" # Skip if it's not a proposal page if not page_title.startswith('Proposal:'): continue proposals.append({ 'title': page_title, 'url': page_url, 'last_modified': timestamp, 'modified_by': user, 'type': 'recent' }) # Limit to the 10 most recent proposals proposals = proposals[:10] logger.info(f"Found {len(proposals)} recently modified proposals") return proposals except requests.exceptions.RequestException as e: logger.error(f"Error fetching recent proposals: {e}") return [] def save_proposals(voting_proposals, recent_proposals): """ Save the proposals to a JSON file """ data = { 'last_updated': datetime.now().isoformat(), 'voting_proposals': voting_proposals, 'recent_proposals': recent_proposals } with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) logger.info(f"Saved {len(voting_proposals)} voting proposals and {len(recent_proposals)} recent proposals to {OUTPUT_FILE}") return OUTPUT_FILE def main(): parser = argparse.ArgumentParser(description='Fetch OSM Wiki proposals') parser.add_argument('--force', action='store_true', help='Force update even if cache is fresh') parser.add_argument('--dry-run', action='store_true', help='Print results without saving to file') args = parser.parse_args() # Check if we should update the cache if args.force or should_update_cache() or args.dry_run: voting_proposals = fetch_voting_proposals() recent_proposals = fetch_recent_proposals() if args.dry_run: logger.info(f"Found {len(voting_proposals)} voting proposals:") for proposal in voting_proposals: logger.info(f"- {proposal['title']}") logger.info(f"Found {len(recent_proposals)} recent proposals:") for proposal in recent_proposals: logger.info(f"- {proposal['title']} (modified by {proposal['modified_by']} on {proposal['last_modified']})") else: output_file = save_proposals(voting_proposals, recent_proposals) logger.info(f"Results saved to {output_file}") else: logger.info("Using cached proposals data") if __name__ == "__main__": main()