ajout infos des archives de proposition wiki

2025-08-31 12:22:07 +02:00 · 2025-08-31 12:22:07 +02:00 · 9bd1fddd8a
commit 9bd1fddd8a
parent 7665f1d99c
9 changed files with 2517 additions and 27 deletions
--- a/wiki_compare/fetch_proposals.py
+++ b/wiki_compare/fetch_proposals.py
@ -7,6 +7,8 @@ import json
 import logging
 import argparse
 import os
+import re
+import time
 from datetime import datetime, timedelta

 # Configure logging
@ -26,6 +28,25 @@ OUTPUT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'proposal
 # Cache timeout (in hours)
 CACHE_TIMEOUT = 1

+# Vote patterns (same as in fetch_archived_proposals.py)
+VOTE_PATTERNS = {
+    'approve': [
+        r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:approve|support|agree\s+with)\s+this\s+proposal',
+        r'I\s+vote\s+(?:to\s+)?(?:approve|support)',
+        r'(?:Symbol\s+support\s+vote\.svg|Symbol_support_vote\.svg)',
+    ],
+    'oppose': [
+        r'I\s+(?:(?:strongly|fully|completely|wholeheartedly)\s+)?(?:oppose|disagree\s+with|reject|do\s+not\s+support)\s+this\s+proposal',
+        r'I\s+vote\s+(?:to\s+)?(?:oppose|reject|against)',
+        r'(?:Symbol\s+oppose\s+vote\.svg|Symbol_oppose_vote\.svg)',
+    ],
+    'abstain': [
+        r'I\s+(?:have\s+comments\s+but\s+)?abstain\s+from\s+voting',
+        r'I\s+(?:have\s+comments\s+but\s+)?(?:neither\s+approve\s+nor\s+oppose|am\s+neutral)',
+        r'(?:Symbol\s+abstain\s+vote\.svg|Symbol_abstain_vote\.svg)',
+    ]
+}
+
 def should_update_cache():
    """
    Check if the cache file exists and if it's older than the cache timeout
@ -46,6 +67,134 @@ def should_update_cache():
    logger.info(f"Cache is still fresh (less than {CACHE_TIMEOUT} hour(s) old)")
    return False

+def fetch_page(url):
+    """
+    Fetch a page from the OSM wiki
+    """
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        return response.text
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Error fetching {url}: {e}")
+        return None
+
+def extract_username(text):
+    """
+    Extract username from a signature line
+    """
+    # Common patterns for signatures
+    patterns = [
+        r'--\s*\[\[User:([^|\]]+)(?:\|[^\]]+)?\]\]',  # --[[User:Username|Username]]
+        r'--\s*\[\[User:([^|\]]+)\]\]',  # --[[User:Username]]
+        r'--\s*\[\[User talk:([^|\]]+)(?:\|[^\]]+)?\]\]',  # --[[User talk:Username|Username]]
+        r'--\s*\[\[User talk:([^|\]]+)\]\]',  # --[[User talk:Username]]
+        r'--\s*\[\[Special:Contributions/([^|\]]+)(?:\|[^\]]+)?\]\]',  # --[[Special:Contributions/Username|Username]]
+        r'--\s*\[\[Special:Contributions/([^|\]]+)\]\]',  # --[[Special:Contributions/Username]]
+    ]
+    
+    for pattern in patterns:
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1).strip()
+    
+    # If no match found with the patterns, try to find any username-like string
+    match = re.search(r'--\s*([A-Za-z0-9_-]+)', text)
+    if match:
+        return match.group(1).strip()
+    
+    return None
+
+def extract_date(text):
+    """
+    Extract date from a signature line
+    """
+    # Look for common date formats in signatures
+    date_patterns = [
+        r'(\d{1,2}:\d{2}, \d{1,2} [A-Za-z]+ \d{4})',  # 15:30, 25 December 2023
+        r'(\d{1,2} [A-Za-z]+ \d{4} \d{1,2}:\d{2})',   # 25 December 2023 15:30
+        r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})',     # 2023-12-25T15:30:00
+    ]
+    
+    for pattern in date_patterns:
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1)
+    
+    return None
+
+def determine_vote_type(text):
+    """
+    Determine the type of vote from the text
+    """
+    text_lower = text.lower()
+    
+    for vote_type, patterns in VOTE_PATTERNS.items():
+        for pattern in patterns:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                return vote_type
+    
+    return None
+
+def extract_votes(html):
+    """
+    Extract voting information from proposal HTML
+    """
+    soup = BeautifulSoup(html, 'html.parser')
+    
+    # Find the voting section
+    voting_section = None
+    for heading in soup.find_all(['h2', 'h3']):
+        heading_text = heading.get_text().lower()
+        if 'voting' in heading_text or 'votes' in heading_text or 'poll' in heading_text:
+            voting_section = heading
+            break
+    
+    if not voting_section:
+        logger.warning("No voting section found")
+        return {
+            'approve': {'count': 0, 'users': []},
+            'oppose': {'count': 0, 'users': []},
+            'abstain': {'count': 0, 'users': []}
+        }
+    
+    # Get the content after the voting section heading
+    votes_content = []
+    current = voting_section.next_sibling
+    
+    # Collect all elements until the next heading or the end of the document
+    while current and not current.name in ['h2', 'h3']:
+        if current.name:  # Skip NavigableString objects
+            votes_content.append(current)
+        current = current.next_sibling
+    
+    # Process vote lists
+    votes = {
+        'approve': {'count': 0, 'users': []},
+        'oppose': {'count': 0, 'users': []},
+        'abstain': {'count': 0, 'users': []}
+    }
+    
+    # Look for lists of votes
+    for element in votes_content:
+        if element.name == 'ul':
+            for li in element.find_all('li'):
+                vote_text = li.get_text()
+                vote_type = determine_vote_type(vote_text)
+                
+                if vote_type:
+                    username = extract_username(vote_text)
+                    date = extract_date(vote_text)
+                    
+                    if username:
+                        votes[vote_type]['count'] += 1
+                        votes[vote_type]['users'].append({
+                            'username': username,
+                            'date': date
+                        })
+    
+    return votes
+
 def fetch_voting_proposals():
    """
    Fetch proposals with "Voting" status from the OSM Wiki
@ -69,12 +218,72 @@ def fetch_voting_proposals():
            proposal_title = link.text.strip()
            proposal_url = 'https://wiki.openstreetmap.org' + link.get('href', '')
            
-            proposals.append({
+            # Create a basic proposal object
+            proposal = {
                'title': proposal_title,
                'url': proposal_url,
                'status': 'Voting',
                'type': 'voting'
-            })
+            }
+            
+            # Fetch the proposal page to extract voting information
+            logger.info(f"Fetching proposal page: {proposal_title}")
+            html = fetch_page(proposal_url)
+            
+            if html:
+                # Extract voting information
+                votes = extract_votes(html)
+                
+                # Add voting information to the proposal
+                proposal['votes'] = votes
+                
+                # Calculate total votes and percentages
+                total_votes = votes['approve']['count'] + votes['oppose']['count'] + votes['abstain']['count']
+                
+                if total_votes > 0:
+                    proposal['total_votes'] = total_votes
+                    proposal['approve_percentage'] = round((votes['approve']['count'] / total_votes) * 100, 1)
+                    proposal['oppose_percentage'] = round((votes['oppose']['count'] / total_votes) * 100, 1)
+                    proposal['abstain_percentage'] = round((votes['abstain']['count'] / total_votes) * 100, 1)
+                else:
+                    proposal['total_votes'] = 0
+                    proposal['approve_percentage'] = 0
+                    proposal['oppose_percentage'] = 0
+                    proposal['abstain_percentage'] = 0
+                
+                # Extract proposer from the page
+                soup = BeautifulSoup(html, 'html.parser')
+                content = soup.select_one('#mw-content-text')
+                
+                if content:
+                    # Look for table rows with "Proposed by:" in the header cell
+                    for row in content.select('tr'):
+                        cells = row.select('th, td')
+                        if len(cells) >= 2:
+                            header_text = cells[0].get_text().strip().lower()
+                            if "proposed by" in header_text:
+                                user_link = cells[1].select_one('a[href*="/wiki/User:"]')
+                                if user_link:
+                                    href = user_link.get('href', '')
+                                    title = user_link.get('title', '')
+                                    
+                                    # Try to get username from title attribute first
+                                    if title and title.startswith('User:'):
+                                        proposal['proposer'] = title[5:]  # Remove 'User:' prefix
+                                    # Otherwise try to extract from href
+                                    elif href:
+                                        href_match = re.search(r'/wiki/User:([^/]+)', href)
+                                        if href_match:
+                                            proposal['proposer'] = href_match.group(1)
+                                    
+                                    # If still no proposer, use the link text
+                                    if 'proposer' not in proposal and user_link.get_text():
+                                        proposal['proposer'] = user_link.get_text().strip()
+                
+            # Add a delay to avoid overloading the server
+            time.sleep(1)
+            
+            proposals.append(proposal)
        
        logger.info(f"Found {len(proposals)} voting proposals")
        return proposals