#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ propose_translation.py This script reads the outdated_pages.json file, selects a wiki page (by default the first one), and uses Ollama with the "mistral:7b" model to propose a translation of the page. The translation is saved in the "proposed_translation" property of the JSON file. Usage: python propose_translation.py [--page KEY] Options: --page KEY Specify the key of the page to translate (default: first page in the file) Output: - Updated outdated_pages.json file with proposed translations """ import json import argparse import logging import requests import os import sys from bs4 import BeautifulSoup # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger(__name__) # Constants OUTDATED_PAGES_FILE = "outdated_pages.json" OLLAMA_API_URL = "http://localhost:11434/api/generate" OLLAMA_MODEL = "mistral:7b" def load_outdated_pages(): """ Load the outdated pages from the JSON file Returns: list: List of dictionaries containing outdated page information """ try: with open(OUTDATED_PAGES_FILE, 'r', encoding='utf-8') as f: pages = json.load(f) logger.info(f"Successfully loaded {len(pages)} pages from {OUTDATED_PAGES_FILE}") return pages except (IOError, json.JSONDecodeError) as e: logger.error(f"Error loading pages from {OUTDATED_PAGES_FILE}: {e}") return [] def save_to_json(data, filename): """ Save data to a JSON file Args: data: Data to save filename (str): Name of the file """ try: with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) logger.info(f"Data saved to {filename}") except IOError as e: logger.error(f"Error saving data to {filename}: {e}") def fetch_wiki_page_content(url): """ Fetch the content of a wiki page Args: url (str): URL of the wiki page Returns: str: Content of the wiki page """ try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Get the main content content = soup.select_one('#mw-content-text') if content: # Remove script and style elements for script in content.select('script, style'): script.extract() # Remove .languages elements for languages_elem in content.select('.languages'): languages_elem.extract() # Get text text = content.get_text(separator=' ', strip=True) return text else: logger.warning(f"Could not find content in page: {url}") return "" except requests.exceptions.RequestException as e: logger.error(f"Error fetching wiki page content: {e}") return "" def translate_with_ollama(text, model=OLLAMA_MODEL): """ Translate text using Ollama Args: text (str): Text to translate model (str): Ollama model to use Returns: str: Translated text """ prompt = f""" Tu es un traducteur professionnel spécialisé dans la traduction de documentation technique de l'anglais vers le français. Traduis le texte suivant de l'anglais vers le français. Conserve le formatage et la structure du texte original. Ne traduis pas les noms propres, les URLs, et les termes techniques spécifiques à OpenStreetMap. Texte à traduire: {text} """ try: logger.info(f"Sending request to Ollama with model {model}") payload = { "model": model, "prompt": prompt, "stream": False } response = requests.post(OLLAMA_API_URL, json=payload) response.raise_for_status() result = response.json() translation = result.get('response', '') logger.info(f"Successfully received translation from Ollama") return translation except requests.exceptions.RequestException as e: logger.error(f"Error translating with Ollama: {e}") return "" def select_page_for_translation(pages, key=None): """ Select a page for translation Args: pages (list): List of dictionaries containing page information key (str): Key of the page to select (if None, select the first page) Returns: dict: Selected page or None if no suitable page found """ if not pages: logger.warning("No pages found that need translation") return None if key: # Find the page with the specified key for page in pages: if page.get('key') == key: logger.info(f"Selected page for key '{key}' for translation") return page logger.warning(f"No page found with key '{key}'") return None else: # Select the first page selected_page = pages[0] logger.info(f"Selected first page (key '{selected_page['key']}') for translation") return selected_page def main(): """Main function to execute the script""" parser = argparse.ArgumentParser(description="Propose a translation for an OSM wiki page using Ollama") parser.add_argument("--page", help="Key of the page to translate (default: first page in the file)") args = parser.parse_args() logger.info("Starting propose_translation.py") # Load pages pages = load_outdated_pages() if not pages: logger.error("No pages found. Run wiki_compare.py first.") sys.exit(1) # Select a page for translation selected_page = select_page_for_translation(pages, args.page) if not selected_page: logger.error("Could not select a page for translation.") sys.exit(1) # Get the English page URL en_url = selected_page.get('en_page', {}).get('url') if not en_url: logger.error(f"No English page URL found for key '{selected_page['key']}'") sys.exit(1) # Fetch the content of the English page logger.info(f"Fetching content from {en_url}") content = fetch_wiki_page_content(en_url) if not content: logger.error(f"Could not fetch content from {en_url}") sys.exit(1) # Translate the content logger.info(f"Translating content for key '{selected_page['key']}'") translation = translate_with_ollama(content) if not translation: logger.error("Could not translate content") sys.exit(1) # Save the translation in the JSON file logger.info(f"Saving translation for key '{selected_page['key']}'") selected_page['proposed_translation'] = translation # Save the updated data back to the file save_to_json(pages, OUTDATED_PAGES_FILE) logger.info("Script completed successfully") if __name__ == "__main__": main()