up wording comparaison
This commit is contained in:
parent
1140c87932
commit
09e16d9075
6 changed files with 443 additions and 239 deletions
|
@ -719,11 +719,68 @@ class WikiController extends AbstractController
|
|||
// Construct the URLs for the English page and the French page creation form
|
||||
$englishUrl = "https://wiki.openstreetmap.org/wiki/Key:{$key}";
|
||||
$frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit";
|
||||
|
||||
// Fetch the HTML content of the English page using wiki_compare.py
|
||||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_compare.py';
|
||||
$englishHtml = null;
|
||||
$frenchHtml = null;
|
||||
|
||||
if (file_exists($scriptPath)) {
|
||||
// Create a temporary Python script to fetch the page content
|
||||
$tempScriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/temp_fetch_page.py';
|
||||
$pythonCode = <<<EOT
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import json
|
||||
from wiki_compare import fetch_wiki_page
|
||||
|
||||
# Get the key from command line arguments
|
||||
key = sys.argv[1]
|
||||
language = sys.argv[2]
|
||||
|
||||
# Fetch the page
|
||||
page = fetch_wiki_page(key, language)
|
||||
|
||||
# Output the HTML content
|
||||
if page and 'html_content' in page:
|
||||
print(page['html_content'])
|
||||
else:
|
||||
print("")
|
||||
EOT;
|
||||
|
||||
file_put_contents($tempScriptPath, $pythonCode);
|
||||
chmod($tempScriptPath, 0755);
|
||||
|
||||
// Fetch English page
|
||||
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} en";
|
||||
$englishHtml = shell_exec($command);
|
||||
|
||||
// Extract only the content part from the HTML (remove headers, footers, etc.)
|
||||
if ($englishHtml) {
|
||||
$englishHtml = $this->extractMainContent($englishHtml);
|
||||
}
|
||||
|
||||
// Fetch French page (might not exist, but we'll try)
|
||||
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr";
|
||||
$frenchHtml = shell_exec($command);
|
||||
|
||||
// Extract only the content part from the HTML (remove headers, footers, etc.)
|
||||
if ($frenchHtml) {
|
||||
$frenchHtml = $this->extractMainContent($frenchHtml);
|
||||
}
|
||||
|
||||
// Clean up the temporary script
|
||||
unlink($tempScriptPath);
|
||||
}
|
||||
|
||||
return $this->render('admin/wiki_create_french.html.twig', [
|
||||
'key' => $key,
|
||||
'english_url' => $englishUrl,
|
||||
'french_edit_url' => $frenchEditUrl
|
||||
'french_edit_url' => $frenchEditUrl,
|
||||
'english_html' => $englishHtml,
|
||||
'french_html' => $frenchHtml
|
||||
]);
|
||||
}
|
||||
|
||||
|
@ -1436,4 +1493,57 @@ class WikiController extends AbstractController
|
|||
'fr_links' => $frLinks
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the main content from the HTML, removing headers, footers, and other unnecessary elements
|
||||
*
|
||||
* @param string $html The full HTML content
|
||||
* @return string The extracted main content
|
||||
*/
|
||||
private function extractMainContent(string $html): string
|
||||
{
|
||||
// Use a simple approach to extract the content
|
||||
// This could be improved with a more sophisticated HTML parser if needed
|
||||
|
||||
// Create a DOMDocument to parse the HTML
|
||||
$dom = new \DOMDocument();
|
||||
|
||||
// Suppress warnings about malformed HTML
|
||||
libxml_use_internal_errors(true);
|
||||
$dom->loadHTML($html);
|
||||
libxml_clear_errors();
|
||||
|
||||
// Try to find the main content element
|
||||
$contentElement = null;
|
||||
|
||||
// First, try to find the element with id "mw-content-text"
|
||||
$contentElement = $dom->getElementById('mw-content-text');
|
||||
|
||||
// If not found, try to find the element with class "mw-content-ltr"
|
||||
if (!$contentElement) {
|
||||
$xpath = new \DOMXPath($dom);
|
||||
$elements = $xpath->query("//*[contains(@class, 'mw-content-ltr')]");
|
||||
if ($elements->length > 0) {
|
||||
$contentElement = $elements->item(0);
|
||||
}
|
||||
}
|
||||
|
||||
// If still not found, return the original HTML
|
||||
if (!$contentElement) {
|
||||
return $html;
|
||||
}
|
||||
|
||||
// Get the HTML of the content element
|
||||
$contentHtml = $dom->saveHTML($contentElement);
|
||||
|
||||
// Clean up the content HTML
|
||||
// Remove script and style elements
|
||||
$contentHtml = preg_replace('/<script\b[^>]*>(.*?)<\/script>/is', '', $contentHtml);
|
||||
$contentHtml = preg_replace('/<style\b[^>]*>(.*?)<\/style>/is', '', $contentHtml);
|
||||
|
||||
// Remove edit section links
|
||||
$contentHtml = preg_replace('/<span class="mw-editsection">(.*?)<\/span>/is', '', $contentHtml);
|
||||
|
||||
return $contentHtml;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue