mirror of
https://forge.chapril.org/tykayn/osm-commerces
synced 2025-10-04 17:04:53 +02:00
233 lines
No EOL
7 KiB
Python
Executable file
233 lines
No EOL
7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
propose_translation.py
|
|
|
|
This script reads the outdated_pages.json file, selects a wiki page (by default the first one),
|
|
and uses Ollama with the "mistral:7b" model to propose a translation of the page.
|
|
The translation is saved in the "proposed_translation" property of the JSON file.
|
|
|
|
Usage:
|
|
python propose_translation.py [--page KEY]
|
|
|
|
Options:
|
|
--page KEY Specify the key of the page to translate (default: first page in the file)
|
|
|
|
Output:
|
|
- Updated outdated_pages.json file with proposed translations
|
|
"""
|
|
|
|
import json
|
|
import argparse
|
|
import logging
|
|
import requests
|
|
import os
|
|
import sys
|
|
from bs4 import BeautifulSoup
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Constants
|
|
OUTDATED_PAGES_FILE = "outdated_pages.json"
|
|
OLLAMA_API_URL = "http://localhost:11434/api/generate"
|
|
OLLAMA_MODEL = "mistral:7b"
|
|
|
|
def load_outdated_pages():
|
|
"""
|
|
Load the outdated pages from the JSON file
|
|
|
|
Returns:
|
|
list: List of dictionaries containing outdated page information
|
|
"""
|
|
try:
|
|
with open(OUTDATED_PAGES_FILE, 'r', encoding='utf-8') as f:
|
|
pages = json.load(f)
|
|
logger.info(f"Successfully loaded {len(pages)} pages from {OUTDATED_PAGES_FILE}")
|
|
return pages
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
logger.error(f"Error loading pages from {OUTDATED_PAGES_FILE}: {e}")
|
|
return []
|
|
|
|
def save_to_json(data, filename):
|
|
"""
|
|
Save data to a JSON file
|
|
|
|
Args:
|
|
data: Data to save
|
|
filename (str): Name of the file
|
|
"""
|
|
try:
|
|
with open(filename, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
logger.info(f"Data saved to {filename}")
|
|
except IOError as e:
|
|
logger.error(f"Error saving data to {filename}: {e}")
|
|
|
|
def fetch_wiki_page_content(url):
|
|
"""
|
|
Fetch the content of a wiki page
|
|
|
|
Args:
|
|
url (str): URL of the wiki page
|
|
|
|
Returns:
|
|
str: Content of the wiki page
|
|
"""
|
|
try:
|
|
response = requests.get(url)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
# Get the main content
|
|
content = soup.select_one('#mw-content-text')
|
|
if content:
|
|
# Remove script and style elements
|
|
for script in content.select('script, style'):
|
|
script.extract()
|
|
|
|
# Remove .languages elements
|
|
for languages_elem in content.select('.languages'):
|
|
languages_elem.extract()
|
|
|
|
# Get text
|
|
text = content.get_text(separator=' ', strip=True)
|
|
return text
|
|
else:
|
|
logger.warning(f"Could not find content in page: {url}")
|
|
return ""
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching wiki page content: {e}")
|
|
return ""
|
|
|
|
def translate_with_ollama(text, model=OLLAMA_MODEL):
|
|
"""
|
|
Translate text using Ollama
|
|
|
|
Args:
|
|
text (str): Text to translate
|
|
model (str): Ollama model to use
|
|
|
|
Returns:
|
|
str: Translated text
|
|
"""
|
|
prompt = f"""
|
|
Tu es un traducteur professionnel spécialisé dans la traduction de documentation technique de l'anglais vers le français.
|
|
Traduis le texte suivant de l'anglais vers le français. Conserve le formatage et la structure du texte original.
|
|
Ne traduis pas les noms propres, les URLs, et les termes techniques spécifiques à OpenStreetMap.
|
|
|
|
Texte à traduire:
|
|
{text}
|
|
"""
|
|
|
|
try:
|
|
logger.info(f"Sending request to Ollama with model {model}")
|
|
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False
|
|
}
|
|
|
|
response = requests.post(OLLAMA_API_URL, json=payload)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
translation = result.get('response', '')
|
|
|
|
logger.info(f"Successfully received translation from Ollama")
|
|
return translation
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error translating with Ollama: {e}")
|
|
return ""
|
|
|
|
def select_page_for_translation(pages, key=None):
|
|
"""
|
|
Select a page for translation
|
|
|
|
Args:
|
|
pages (list): List of dictionaries containing page information
|
|
key (str): Key of the page to select (if None, select the first page)
|
|
|
|
Returns:
|
|
dict: Selected page or None if no suitable page found
|
|
"""
|
|
if not pages:
|
|
logger.warning("No pages found that need translation")
|
|
return None
|
|
|
|
if key:
|
|
# Find the page with the specified key
|
|
for page in pages:
|
|
if page.get('key') == key:
|
|
logger.info(f"Selected page for key '{key}' for translation")
|
|
return page
|
|
|
|
logger.warning(f"No page found with key '{key}'")
|
|
return None
|
|
else:
|
|
# Select the first page
|
|
selected_page = pages[0]
|
|
logger.info(f"Selected first page (key '{selected_page['key']}') for translation")
|
|
return selected_page
|
|
|
|
def main():
|
|
"""Main function to execute the script"""
|
|
parser = argparse.ArgumentParser(description="Propose a translation for an OSM wiki page using Ollama")
|
|
parser.add_argument("--page", help="Key of the page to translate (default: first page in the file)")
|
|
args = parser.parse_args()
|
|
|
|
logger.info("Starting propose_translation.py")
|
|
|
|
# Load pages
|
|
pages = load_outdated_pages()
|
|
if not pages:
|
|
logger.error("No pages found. Run wiki_compare.py first.")
|
|
sys.exit(1)
|
|
|
|
# Select a page for translation
|
|
selected_page = select_page_for_translation(pages, args.page)
|
|
if not selected_page:
|
|
logger.error("Could not select a page for translation.")
|
|
sys.exit(1)
|
|
|
|
# Get the English page URL
|
|
en_url = selected_page.get('en_page', {}).get('url')
|
|
if not en_url:
|
|
logger.error(f"No English page URL found for key '{selected_page['key']}'")
|
|
sys.exit(1)
|
|
|
|
# Fetch the content of the English page
|
|
logger.info(f"Fetching content from {en_url}")
|
|
content = fetch_wiki_page_content(en_url)
|
|
if not content:
|
|
logger.error(f"Could not fetch content from {en_url}")
|
|
sys.exit(1)
|
|
|
|
# Translate the content
|
|
logger.info(f"Translating content for key '{selected_page['key']}'")
|
|
translation = translate_with_ollama(content)
|
|
if not translation:
|
|
logger.error("Could not translate content")
|
|
sys.exit(1)
|
|
|
|
# Save the translation in the JSON file
|
|
logger.info(f"Saving translation for key '{selected_page['key']}'")
|
|
selected_page['proposed_translation'] = translation
|
|
|
|
# Save the updated data back to the file
|
|
save_to_json(pages, OUTDATED_PAGES_FILE)
|
|
|
|
logger.info("Script completed successfully")
|
|
|
|
if __name__ == "__main__":
|
|
main() |