ajout infos des archives de proposition wiki

This commit is contained in:
Tykayn 2025-08-31 12:22:07 +02:00 committed by tykayn
parent 7665f1d99c
commit 9bd1fddd8a
9 changed files with 2517 additions and 27 deletions

View file

@ -7,6 +7,8 @@ import json
import logging
import argparse
import os
import re
import time
from datetime import datetime, timedelta
# Configure logging
@ -26,6 +28,25 @@ OUTPUT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'proposal
# Cache timeout (in hours)
CACHE_TIMEOUT = 1
# Vote patterns (same as in fetch_archived_proposals.py)
VOTE_PATTERNS = {
'approve': [
r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:approve|support|agree\s+with)\s+this\s+proposal',
r'I\s+vote\s+(?:to\s+)?(?:approve|support)',
r'(?:Symbol\s+support\s+vote\.svg|Symbol_support_vote\.svg)',
],
'oppose': [
r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:oppose|disagree\s+with|reject|do\s+not\s+support)\s+this\s+proposal',
r'I\s+vote\s+(?:to\s+)?(?:oppose|reject|against)',
r'(?:Symbol\s+oppose\s+vote\.svg|Symbol_oppose_vote\.svg)',
],
'abstain': [
r'I\s+(?:have\s+comments\s+but\s+)?abstain\s+from\s+voting',
r'I\s+(?:have\s+comments\s+but\s+)?(?:neither\s+approve\s+nor\s+oppose|am\s+neutral)',
r'(?:Symbol\s+abstain\s+vote\.svg|Symbol_abstain_vote\.svg)',
]
}
def should_update_cache():
"""
Check if the cache file exists and if it's older than the cache timeout
@ -46,6 +67,134 @@ def should_update_cache():
logger.info(f"Cache is still fresh (less than {CACHE_TIMEOUT} hour(s) old)")
return False
def fetch_page(url):
"""
Fetch a page from the OSM wiki
"""
try:
response = requests.get(url)
response.raise_for_status()
return response.text
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching {url}: {e}")
return None
def extract_username(text):
"""
Extract username from a signature line
"""
# Common patterns for signatures
patterns = [
r'--\s*\[\[User:([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[User:Username|Username]]
r'--\s*\[\[User:([^|\]]+)\]\]', # --[[User:Username]]
r'--\s*\[\[User talk:([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[User talk:Username|Username]]
r'--\s*\[\[User talk:([^|\]]+)\]\]', # --[[User talk:Username]]
r'--\s*\[\[Special:Contributions/([^|\]]+)(?:\|[^\]]+)?\]\]', # --[[Special:Contributions/Username|Username]]
r'--\s*\[\[Special:Contributions/([^|\]]+)\]\]', # --[[Special:Contributions/Username]]
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
return match.group(1).strip()
# If no match found with the patterns, try to find any username-like string
match = re.search(r'--\s*([A-Za-z0-9_-]+)', text)
if match:
return match.group(1).strip()
return None
def extract_date(text):
"""
Extract date from a signature line
"""
# Look for common date formats in signatures
date_patterns = [
r'(\d{1,2}:\d{2}, \d{1,2} [A-Za-z]+ \d{4})', # 15:30, 25 December 2023
r'(\d{1,2} [A-Za-z]+ \d{4} \d{1,2}:\d{2})', # 25 December 2023 15:30
r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})', # 2023-12-25T15:30:00
]
for pattern in date_patterns:
match = re.search(pattern, text)
if match:
return match.group(1)
return None
def determine_vote_type(text):
"""
Determine the type of vote from the text
"""
text_lower = text.lower()
for vote_type, patterns in VOTE_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, text_lower, re.IGNORECASE):
return vote_type
return None
def extract_votes(html):
"""
Extract voting information from proposal HTML
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the voting section
voting_section = None
for heading in soup.find_all(['h2', 'h3']):
heading_text = heading.get_text().lower()
if 'voting' in heading_text or 'votes' in heading_text or 'poll' in heading_text:
voting_section = heading
break
if not voting_section:
logger.warning("No voting section found")
return {
'approve': {'count': 0, 'users': []},
'oppose': {'count': 0, 'users': []},
'abstain': {'count': 0, 'users': []}
}
# Get the content after the voting section heading
votes_content = []
current = voting_section.next_sibling
# Collect all elements until the next heading or the end of the document
while current and not current.name in ['h2', 'h3']:
if current.name: # Skip NavigableString objects
votes_content.append(current)
current = current.next_sibling
# Process vote lists
votes = {
'approve': {'count': 0, 'users': []},
'oppose': {'count': 0, 'users': []},
'abstain': {'count': 0, 'users': []}
}
# Look for lists of votes
for element in votes_content:
if element.name == 'ul':
for li in element.find_all('li'):
vote_text = li.get_text()
vote_type = determine_vote_type(vote_text)
if vote_type:
username = extract_username(vote_text)
date = extract_date(vote_text)
if username:
votes[vote_type]['count'] += 1
votes[vote_type]['users'].append({
'username': username,
'date': date
})
return votes
def fetch_voting_proposals():
"""
Fetch proposals with "Voting" status from the OSM Wiki
@ -69,12 +218,72 @@ def fetch_voting_proposals():
proposal_title = link.text.strip()
proposal_url = 'https://wiki.openstreetmap.org' + link.get('href', '')
proposals.append({
# Create a basic proposal object
proposal = {
'title': proposal_title,
'url': proposal_url,
'status': 'Voting',
'type': 'voting'
})
}
# Fetch the proposal page to extract voting information
logger.info(f"Fetching proposal page: {proposal_title}")
html = fetch_page(proposal_url)
if html:
# Extract voting information
votes = extract_votes(html)
# Add voting information to the proposal
proposal['votes'] = votes
# Calculate total votes and percentages
total_votes = votes['approve']['count'] + votes['oppose']['count'] + votes['abstain']['count']
if total_votes > 0:
proposal['total_votes'] = total_votes
proposal['approve_percentage'] = round((votes['approve']['count'] / total_votes) * 100, 1)
proposal['oppose_percentage'] = round((votes['oppose']['count'] / total_votes) * 100, 1)
proposal['abstain_percentage'] = round((votes['abstain']['count'] / total_votes) * 100, 1)
else:
proposal['total_votes'] = 0
proposal['approve_percentage'] = 0
proposal['oppose_percentage'] = 0
proposal['abstain_percentage'] = 0
# Extract proposer from the page
soup = BeautifulSoup(html, 'html.parser')
content = soup.select_one('#mw-content-text')
if content:
# Look for table rows with "Proposed by:" in the header cell
for row in content.select('tr'):
cells = row.select('th, td')
if len(cells) >= 2:
header_text = cells[0].get_text().strip().lower()
if "proposed by" in header_text:
user_link = cells[1].select_one('a[href*="/wiki/User:"]')
if user_link:
href = user_link.get('href', '')
title = user_link.get('title', '')
# Try to get username from title attribute first
if title and title.startswith('User:'):
proposal['proposer'] = title[5:] # Remove 'User:' prefix
# Otherwise try to extract from href
elif href:
href_match = re.search(r'/wiki/User:([^/]+)', href)
if href_match:
proposal['proposer'] = href_match.group(1)
# If still no proposer, use the link text
if 'proposer' not in proposal and user_link.get_text():
proposal['proposer'] = user_link.get_text().strip()
# Add a delay to avoid overloading the server
time.sleep(1)
proposals.append(proposal)
logger.info(f"Found {len(proposals)} voting proposals")
return proposals