up wording comparaison

This commit is contained in:
Tykayn 2025-09-03 16:04:16 +02:00 committed by tykayn
parent 1140c87932
commit 09e16d9075
6 changed files with 443 additions and 239 deletions

View file

@ -719,11 +719,68 @@ class WikiController extends AbstractController
// Construct the URLs for the English page and the French page creation form
$englishUrl = "https://wiki.openstreetmap.org/wiki/Key:{$key}";
$frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit";
// Fetch the HTML content of the English page using wiki_compare.py
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_compare.py';
$englishHtml = null;
$frenchHtml = null;
if (file_exists($scriptPath)) {
// Create a temporary Python script to fetch the page content
$tempScriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/temp_fetch_page.py';
$pythonCode = <<<EOT
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import json
from wiki_compare import fetch_wiki_page
# Get the key from command line arguments
key = sys.argv[1]
language = sys.argv[2]
# Fetch the page
page = fetch_wiki_page(key, language)
# Output the HTML content
if page and 'html_content' in page:
print(page['html_content'])
else:
print("")
EOT;
file_put_contents($tempScriptPath, $pythonCode);
chmod($tempScriptPath, 0755);
// Fetch English page
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} en";
$englishHtml = shell_exec($command);
// Extract only the content part from the HTML (remove headers, footers, etc.)
if ($englishHtml) {
$englishHtml = $this->extractMainContent($englishHtml);
}
// Fetch French page (might not exist, but we'll try)
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr";
$frenchHtml = shell_exec($command);
// Extract only the content part from the HTML (remove headers, footers, etc.)
if ($frenchHtml) {
$frenchHtml = $this->extractMainContent($frenchHtml);
}
// Clean up the temporary script
unlink($tempScriptPath);
}
return $this->render('admin/wiki_create_french.html.twig', [
'key' => $key,
'english_url' => $englishUrl,
'french_edit_url' => $frenchEditUrl
'french_edit_url' => $frenchEditUrl,
'english_html' => $englishHtml,
'french_html' => $frenchHtml
]);
}
@ -1436,4 +1493,57 @@ class WikiController extends AbstractController
'fr_links' => $frLinks
]);
}
/**
* Extracts the main content from the HTML, removing headers, footers, and other unnecessary elements
*
* @param string $html The full HTML content
* @return string The extracted main content
*/
private function extractMainContent(string $html): string
{
// Use a simple approach to extract the content
// This could be improved with a more sophisticated HTML parser if needed
// Create a DOMDocument to parse the HTML
$dom = new \DOMDocument();
// Suppress warnings about malformed HTML
libxml_use_internal_errors(true);
$dom->loadHTML($html);
libxml_clear_errors();
// Try to find the main content element
$contentElement = null;
// First, try to find the element with id "mw-content-text"
$contentElement = $dom->getElementById('mw-content-text');
// If not found, try to find the element with class "mw-content-ltr"
if (!$contentElement) {
$xpath = new \DOMXPath($dom);
$elements = $xpath->query("//*[contains(@class, 'mw-content-ltr')]");
if ($elements->length > 0) {
$contentElement = $elements->item(0);
}
}
// If still not found, return the original HTML
if (!$contentElement) {
return $html;
}
// Get the HTML of the content element
$contentHtml = $dom->saveHTML($contentElement);
// Clean up the content HTML
// Remove script and style elements
$contentHtml = preg_replace('/<script\b[^>]*>(.*?)<\/script>/is', '', $contentHtml);
$contentHtml = preg_replace('/<style\b[^>]*>(.*?)<\/style>/is', '', $contentHtml);
// Remove edit section links
$contentHtml = preg_replace('/<span class="mw-editsection">(.*?)<\/span>/is', '', $contentHtml);
return $contentHtml;
}
}