392 lines
No EOL
15 KiB
Python
Executable file
392 lines
No EOL
15 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
import logging
|
|
import argparse
|
|
import os
|
|
import re
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# URLs for OSM Wiki proposals
|
|
VOTING_PROPOSALS_URL = "https://wiki.openstreetmap.org/wiki/Category:Proposals_with_%22Voting%22_status"
|
|
RECENT_CHANGES_URL = "https://wiki.openstreetmap.org/w/index.php?title=Special:RecentChanges&namespace=102&limit=50" # Namespace 102 is for Proposal pages
|
|
|
|
# Output file
|
|
OUTPUT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'proposals.json')
|
|
|
|
# Cache timeout (in hours)
|
|
CACHE_TIMEOUT = 1
|
|
|
|
# Vote patterns (same as in fetch_archived_proposals.py)
|
|
VOTE_PATTERNS = {
|
|
'approve': [
|
|
r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:approve|support|agree\s+with)\s+this\s+proposal',
|
|
r'I\s+vote\s+(?:to\s+)?(?:approve|support)',
|
|
r'(?:Symbol\s+support\s+vote\.svg|Symbol_support_vote\.svg)',
|
|
],
|
|
'oppose': [
|
|
r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:oppose|disagree\s+with|reject|do\s+not\s+support)\s+this\s+proposal',
|
|
r'I\s+vote\s+(?:to\s+)?(?:oppose|reject|against)',
|
|
r'(?:Symbol\s+oppose\s+vote\.svg|Symbol_oppose_vote\.svg)',
|
|
],
|
|
'abstain': [
|
|
r'I\s+(?:have\s+comments\s+but\s+)?abstain\s+from\s+voting',
|
|
r'I\s+(?:have\s+comments\s+but\s+)?(?:neither\s+approve\s+nor\s+oppose|am\s+neutral)',
|
|
r'(?:Symbol\s+abstain\s+vote\.svg|Symbol_abstain_vote\.svg)',
|
|
]
|
|
}
|
|
|
|
def should_update_cache():
|
|
"""
|
|
Check if the cache file exists and if it's older than the cache timeout
|
|
"""
|
|
if not os.path.exists(OUTPUT_FILE):
|
|
logger.info("Cache file doesn't exist, creating it")
|
|
return True
|
|
|
|
# Check file modification time
|
|
file_mtime = datetime.fromtimestamp(os.path.getmtime(OUTPUT_FILE))
|
|
now = datetime.now()
|
|
|
|
# If file is older than cache timeout, update it
|
|
if now - file_mtime > timedelta(hours=CACHE_TIMEOUT):
|
|
logger.info(f"Cache is older than {CACHE_TIMEOUT} hour(s), updating")
|
|
return True
|
|
|
|
logger.info(f"Cache is still fresh (less than {CACHE_TIMEOUT} hour(s) old)")
|
|
return False
|
|
|
|
def fetch_page(url):
|
|
"""
|
|
Fetch a page from the OSM wiki
|
|
"""
|
|
try:
|
|
response = requests.get(url)
|
|
response.raise_for_status()
|
|
return response.text
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching {url}: {e}")
|
|
return None
|
|
|
|
def extract_username(text):
|
|
"""
|
|
Extract username from a signature line
|
|
"""
|
|
# Common patterns for signatures
|
|
patterns = [
|
|
r'--\s*\[\[User:([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[User:Username|Username]]
|
|
r'--\s*\[\[User:([^|\]]+)\]\]', # --[[User:Username]]
|
|
r'--\s*\[\[User talk:([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[User talk:Username|Username]]
|
|
r'--\s*\[\[User talk:([^|\]]+)\]\]', # --[[User talk:Username]]
|
|
r'--\s*\[\[Special:Contributions/([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[Special:Contributions/Username|Username]]
|
|
r'--\s*\[\[Special:Contributions/([^|\]]+)\]\]', # --[[Special:Contributions/Username]]
|
|
]
|
|
|
|
for pattern in patterns:
|
|
match = re.search(pattern, text)
|
|
if match:
|
|
return match.group(1).strip()
|
|
|
|
# If no match found with the patterns, try to find any username-like string
|
|
match = re.search(r'--\s*([A-Za-z0-9_-]+)', text)
|
|
if match:
|
|
return match.group(1).strip()
|
|
|
|
return None
|
|
|
|
def extract_date(text):
|
|
"""
|
|
Extract date from a signature line
|
|
"""
|
|
# Look for common date formats in signatures
|
|
date_patterns = [
|
|
r'(\d{1,2}:\d{2}, \d{1,2} [A-Za-z]+ \d{4})', # 15:30, 25 December 2023
|
|
r'(\d{1,2} [A-Za-z]+ \d{4} \d{1,2}:\d{2})', # 25 December 2023 15:30
|
|
r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})', # 2023-12-25T15:30:00
|
|
]
|
|
|
|
for pattern in date_patterns:
|
|
match = re.search(pattern, text)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
return None
|
|
|
|
def determine_vote_type(text):
|
|
"""
|
|
Determine the type of vote from the text
|
|
"""
|
|
text_lower = text.lower()
|
|
|
|
for vote_type, patterns in VOTE_PATTERNS.items():
|
|
for pattern in patterns:
|
|
if re.search(pattern, text_lower, re.IGNORECASE):
|
|
return vote_type
|
|
|
|
return None
|
|
|
|
def extract_votes(html):
|
|
"""
|
|
Extract voting information from proposal HTML
|
|
"""
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
# Find the voting section
|
|
voting_section = None
|
|
for heading in soup.find_all(['h2', 'h3']):
|
|
heading_text = heading.get_text().lower()
|
|
if 'voting' in heading_text or 'votes' in heading_text or 'poll' in heading_text:
|
|
voting_section = heading
|
|
break
|
|
|
|
if not voting_section:
|
|
logger.warning("No voting section found")
|
|
return {
|
|
'approve': {'count': 0, 'users': []},
|
|
'oppose': {'count': 0, 'users': []},
|
|
'abstain': {'count': 0, 'users': []}
|
|
}
|
|
|
|
# Get the content after the voting section heading
|
|
votes_content = []
|
|
current = voting_section.next_sibling
|
|
|
|
# Collect all elements until the next heading or the end of the document
|
|
while current and not current.name in ['h2', 'h3']:
|
|
if current.name: # Skip NavigableString objects
|
|
votes_content.append(current)
|
|
current = current.next_sibling
|
|
|
|
# Process vote lists
|
|
votes = {
|
|
'approve': {'count': 0, 'users': []},
|
|
'oppose': {'count': 0, 'users': []},
|
|
'abstain': {'count': 0, 'users': []}
|
|
}
|
|
|
|
# Look for lists of votes
|
|
for element in votes_content:
|
|
if element.name == 'ul':
|
|
for li in element.find_all('li'):
|
|
vote_text = li.get_text()
|
|
vote_type = determine_vote_type(vote_text)
|
|
|
|
if vote_type:
|
|
username = extract_username(vote_text)
|
|
date = extract_date(vote_text)
|
|
|
|
if username:
|
|
votes[vote_type]['count'] += 1
|
|
votes[vote_type]['users'].append({
|
|
'username': username,
|
|
'date': date
|
|
})
|
|
|
|
return votes
|
|
|
|
def fetch_voting_proposals():
|
|
"""
|
|
Fetch proposals with "Voting" status from the OSM Wiki
|
|
"""
|
|
logger.info(f"Fetching voting proposals from {VOTING_PROPOSALS_URL}")
|
|
try:
|
|
response = requests.get(VOTING_PROPOSALS_URL)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
proposals = []
|
|
|
|
# Find all links in the mw-pages section
|
|
links = soup.select('#mw-pages a')
|
|
|
|
for link in links:
|
|
# Skip category links and other non-proposal links
|
|
if 'Category:' in link.get('href', '') or 'Special:' in link.get('href', ''):
|
|
continue
|
|
|
|
proposal_title = link.text.strip()
|
|
proposal_url = 'https://wiki.openstreetmap.org' + link.get('href', '')
|
|
|
|
# Create a basic proposal object
|
|
proposal = {
|
|
'title': proposal_title,
|
|
'url': proposal_url,
|
|
'status': 'Voting',
|
|
'type': 'voting'
|
|
}
|
|
|
|
# Fetch the proposal page to extract voting information
|
|
logger.info(f"Fetching proposal page: {proposal_title}")
|
|
html = fetch_page(proposal_url)
|
|
|
|
if html:
|
|
# Extract voting information
|
|
votes = extract_votes(html)
|
|
|
|
# Add voting information to the proposal
|
|
proposal['votes'] = votes
|
|
|
|
# Calculate total votes and percentages
|
|
total_votes = votes['approve']['count'] + votes['oppose']['count'] + votes['abstain']['count']
|
|
|
|
if total_votes > 0:
|
|
proposal['total_votes'] = total_votes
|
|
proposal['approve_percentage'] = round((votes['approve']['count'] / total_votes) * 100, 1)
|
|
proposal['oppose_percentage'] = round((votes['oppose']['count'] / total_votes) * 100, 1)
|
|
proposal['abstain_percentage'] = round((votes['abstain']['count'] / total_votes) * 100, 1)
|
|
else:
|
|
proposal['total_votes'] = 0
|
|
proposal['approve_percentage'] = 0
|
|
proposal['oppose_percentage'] = 0
|
|
proposal['abstain_percentage'] = 0
|
|
|
|
# Extract proposer from the page
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
content = soup.select_one('#mw-content-text')
|
|
|
|
if content:
|
|
# Look for table rows with "Proposed by:" in the header cell
|
|
for row in content.select('tr'):
|
|
cells = row.select('th, td')
|
|
if len(cells) >= 2:
|
|
header_text = cells[0].get_text().strip().lower()
|
|
if "proposed by" in header_text:
|
|
user_link = cells[1].select_one('a[href*="/wiki/User:"]')
|
|
if user_link:
|
|
href = user_link.get('href', '')
|
|
title = user_link.get('title', '')
|
|
|
|
# Try to get username from title attribute first
|
|
if title and title.startswith('User:'):
|
|
proposal['proposer'] = title[5:] # Remove 'User:' prefix
|
|
# Otherwise try to extract from href
|
|
elif href:
|
|
href_match = re.search(r'/wiki/User:([^/]+)', href)
|
|
if href_match:
|
|
proposal['proposer'] = href_match.group(1)
|
|
|
|
# If still no proposer, use the link text
|
|
if 'proposer' not in proposal and user_link.get_text():
|
|
proposal['proposer'] = user_link.get_text().strip()
|
|
|
|
# Add a delay to avoid overloading the server
|
|
time.sleep(1)
|
|
|
|
proposals.append(proposal)
|
|
|
|
logger.info(f"Found {len(proposals)} voting proposals")
|
|
return proposals
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching voting proposals: {e}")
|
|
return []
|
|
|
|
def fetch_recent_proposals():
|
|
"""
|
|
Fetch recently modified proposals from the OSM Wiki
|
|
"""
|
|
logger.info(f"Fetching recent changes from {RECENT_CHANGES_URL}")
|
|
try:
|
|
response = requests.get(RECENT_CHANGES_URL)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
proposals = []
|
|
|
|
# Find all change list lines
|
|
change_lines = soup.select('.mw-changeslist .mw-changeslist-line')
|
|
|
|
for line in change_lines:
|
|
# Get the page title
|
|
title_element = line.select_one('.mw-changeslist-title')
|
|
if not title_element:
|
|
continue
|
|
|
|
page_title = title_element.text.strip()
|
|
page_url = title_element.get('href', '')
|
|
if not page_url.startswith('http'):
|
|
page_url = f"https://wiki.openstreetmap.org{page_url}"
|
|
|
|
# Get the timestamp
|
|
timestamp_element = line.select_one('.mw-changeslist-date')
|
|
timestamp = timestamp_element.text.strip() if timestamp_element else ""
|
|
|
|
# Get the user who made the change
|
|
user_element = line.select_one('.mw-userlink')
|
|
user = user_element.text.strip() if user_element else "Unknown"
|
|
|
|
# Skip if it's not a proposal page
|
|
if not page_title.startswith('Proposal:'):
|
|
continue
|
|
|
|
proposals.append({
|
|
'title': page_title,
|
|
'url': page_url,
|
|
'last_modified': timestamp,
|
|
'modified_by': user,
|
|
'type': 'recent'
|
|
})
|
|
|
|
# Limit to the 10 most recent proposals
|
|
proposals = proposals[:10]
|
|
logger.info(f"Found {len(proposals)} recently modified proposals")
|
|
return proposals
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching recent proposals: {e}")
|
|
return []
|
|
|
|
def save_proposals(voting_proposals, recent_proposals):
|
|
"""
|
|
Save the proposals to a JSON file
|
|
"""
|
|
data = {
|
|
'last_updated': datetime.now().isoformat(),
|
|
'voting_proposals': voting_proposals,
|
|
'recent_proposals': recent_proposals
|
|
}
|
|
|
|
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
logger.info(f"Saved {len(voting_proposals)} voting proposals and {len(recent_proposals)} recent proposals to {OUTPUT_FILE}")
|
|
return OUTPUT_FILE
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Fetch OSM Wiki proposals')
|
|
parser.add_argument('--force', action='store_true', help='Force update even if cache is fresh')
|
|
parser.add_argument('--dry-run', action='store_true', help='Print results without saving to file')
|
|
args = parser.parse_args()
|
|
|
|
# Check if we should update the cache
|
|
if args.force or should_update_cache() or args.dry_run:
|
|
voting_proposals = fetch_voting_proposals()
|
|
recent_proposals = fetch_recent_proposals()
|
|
|
|
if args.dry_run:
|
|
logger.info(f"Found {len(voting_proposals)} voting proposals:")
|
|
for proposal in voting_proposals:
|
|
logger.info(f"- {proposal['title']}")
|
|
|
|
logger.info(f"Found {len(recent_proposals)} recent proposals:")
|
|
for proposal in recent_proposals:
|
|
logger.info(f"- {proposal['title']} (modified by {proposal['modified_by']} on {proposal['last_modified']})")
|
|
else:
|
|
output_file = save_proposals(voting_proposals, recent_proposals)
|
|
logger.info(f"Results saved to {output_file}")
|
|
else:
|
|
logger.info("Using cached proposals data")
|
|
|
|
if __name__ == "__main__":
|
|
main() |