#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ fix_grammar_suggestions.py This script adds grammar suggestions to the "type" page in the outdated_pages.json file. It fetches the French content for the page, runs the grammar checker, and updates the file. """ import json import logging import os import subprocess import tempfile import requests from bs4 import BeautifulSoup # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger(__name__) # Constants OUTDATED_PAGES_FILE = "outdated_pages.json" TARGET_KEY = "type" def load_outdated_pages(): """ Load the outdated pages from the JSON file Returns: dict: Dictionary containing outdated page information """ try: with open(OUTDATED_PAGES_FILE, 'r', encoding='utf-8') as f: data = json.load(f) logger.info(f"Successfully loaded outdated pages from {OUTDATED_PAGES_FILE}") return data except (IOError, json.JSONDecodeError) as e: logger.error(f"Error loading pages from {OUTDATED_PAGES_FILE}: {e}") return None def save_outdated_pages(data): """ Save the outdated pages to the JSON file Args: data (dict): Dictionary containing outdated page information """ try: with open(OUTDATED_PAGES_FILE, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) logger.info(f"Successfully saved outdated pages to {OUTDATED_PAGES_FILE}") except IOError as e: logger.error(f"Error saving pages to {OUTDATED_PAGES_FILE}: {e}") def fetch_wiki_page_content(url): """ Fetch the content of a wiki page Args: url (str): URL of the wiki page Returns: str: Content of the wiki page """ try: logger.info(f"Fetching content from {url}") response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Get the main content content = soup.select_one('#mw-content-text') if content: # Remove script and style elements for script in content.select('script, style'): script.extract() # Remove .languages elements for languages_elem in content.select('.languages'): languages_elem.extract() # Get text text = content.get_text(separator=' ', strip=True) logger.info(f"Successfully fetched content ({len(text)} characters)") return text else: logger.warning(f"Could not find content in page: {url}") return "" except requests.exceptions.RequestException as e: logger.error(f"Error fetching wiki page content: {e}") return "" def check_grammar_with_grammalecte(text): """ Check grammar in French text using grammalecte-cli Args: text (str): French text to check Returns: list: List of grammar suggestions """ if not text or len(text.strip()) == 0: logger.warning("Empty text provided for grammar checking") return [] logger.info("Checking grammar with grammalecte-cli...") try: # Create a temporary file with the text with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', suffix='.txt', delete=False) as temp_file: temp_file.write(text) temp_file_path = temp_file.name # Run grammalecte-cli on the temporary file cmd = ['grammalecte-cli', '-f', temp_file_path, '-j', '-ctx', '-wss'] result = subprocess.run(cmd, capture_output=True, text=True, check=True) # Parse the JSON output grammar_data = json.loads(result.stdout) # Extract grammar errors from all paragraphs grammar_suggestions = [] for paragraph in grammar_data.get('data', []): paragraph_index = paragraph.get('iParagraph', 0) # Process grammar errors for error in paragraph.get('lGrammarErrors', []): suggestion = { 'paragraph': paragraph_index, 'start': error.get('nStart', 0), 'end': error.get('nEnd', 0), 'type': error.get('sType', ''), 'message': error.get('sMessage', ''), 'suggestions': error.get('aSuggestions', []), 'text': error.get('sUnderlined', ''), 'before': error.get('sBefore', ''), 'after': error.get('sAfter', '') } grammar_suggestions.append(suggestion) # Process spelling errors for error in paragraph.get('lSpellingErrors', []): suggestion = { 'paragraph': paragraph_index, 'start': error.get('nStart', 0), 'end': error.get('nEnd', 0), 'type': 'spelling', 'message': 'Erreur d\'orthographe', 'suggestions': error.get('aSuggestions', []), 'text': error.get('sUnderlined', ''), 'before': error.get('sBefore', ''), 'after': error.get('sAfter', '') } grammar_suggestions.append(suggestion) # Clean up the temporary file os.unlink(temp_file_path) logger.info(f"Found {len(grammar_suggestions)} grammar/spelling suggestions") return grammar_suggestions except subprocess.CalledProcessError as e: logger.error(f"Error running grammalecte-cli: {e}") logger.error(f"stdout: {e.stdout}") logger.error(f"stderr: {e.stderr}") return [] except json.JSONDecodeError as e: logger.error(f"Error parsing grammalecte-cli output: {e}") return [] except Exception as e: logger.error(f"Unexpected error during grammar checking: {e}") return [] def main(): """Main function to execute the script""" logger.info("Starting fix_grammar_suggestions.py") # Load outdated pages data = load_outdated_pages() if not data: logger.error("Failed to load outdated pages") return # Find the "type" page in the regular_pages array type_page = None for i, page in enumerate(data.get('regular_pages', [])): if page.get('key') == TARGET_KEY: type_page = page type_page_index = i break if not type_page: logger.error(f"Could not find page with key '{TARGET_KEY}'") return # Get the French page URL fr_page = type_page.get('fr_page') if not fr_page: logger.error(f"No French page found for key '{TARGET_KEY}'") return fr_url = fr_page.get('url') if not fr_url: logger.error(f"No URL found for French page of key '{TARGET_KEY}'") return # Fetch the content of the French page content = fetch_wiki_page_content(fr_url) if not content: logger.error(f"Could not fetch content from {fr_url}") return # Check grammar logger.info(f"Checking grammar for key '{TARGET_KEY}'") suggestions = check_grammar_with_grammalecte(content) if not suggestions: logger.warning("No grammar suggestions found or grammar checker not available") # Add the grammar suggestions to the page type_page['grammar_suggestions'] = suggestions # Update the page in the data data['regular_pages'][type_page_index] = type_page # Save the updated data save_outdated_pages(data) logger.info("Script completed successfully") if __name__ == "__main__": main()