diff --git a/.gitignore b/.gitignore
index e014211..ee4d0a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@ venv
wiki_compare/.env
wiki_compare/*.png
wiki_compare/*.json
+wiki_compare/html_cache/
public/*.json
.idea
\ No newline at end of file
diff --git a/.idea/php.xml b/.idea/php.xml
index e72d568..520e8bc 100644
--- a/.idea/php.xml
+++ b/.idea/php.xml
@@ -141,7 +141,7 @@
-
+
diff --git a/src/Controller/WikiController.php b/src/Controller/WikiController.php
index fb610e4..f9166f2 100644
--- a/src/Controller/WikiController.php
+++ b/src/Controller/WikiController.php
@@ -719,11 +719,68 @@ class WikiController extends AbstractController
// Construct the URLs for the English page and the French page creation form
$englishUrl = "https://wiki.openstreetmap.org/wiki/Key:{$key}";
$frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit";
+
+ // Fetch the HTML content of the English page using wiki_compare.py
+ $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_compare.py';
+ $englishHtml = null;
+ $frenchHtml = null;
+
+ if (file_exists($scriptPath)) {
+ // Create a temporary Python script to fetch the page content
+ $tempScriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/temp_fetch_page.py';
+ $pythonCode = <<getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} en";
+ $englishHtml = shell_exec($command);
+
+ // Extract only the content part from the HTML (remove headers, footers, etc.)
+ if ($englishHtml) {
+ $englishHtml = $this->extractMainContent($englishHtml);
+ }
+
+ // Fetch French page (might not exist, but we'll try)
+ $command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr";
+ $frenchHtml = shell_exec($command);
+
+ // Extract only the content part from the HTML (remove headers, footers, etc.)
+ if ($frenchHtml) {
+ $frenchHtml = $this->extractMainContent($frenchHtml);
+ }
+
+ // Clean up the temporary script
+ unlink($tempScriptPath);
+ }
return $this->render('admin/wiki_create_french.html.twig', [
'key' => $key,
'english_url' => $englishUrl,
- 'french_edit_url' => $frenchEditUrl
+ 'french_edit_url' => $frenchEditUrl,
+ 'english_html' => $englishHtml,
+ 'french_html' => $frenchHtml
]);
}
@@ -1436,4 +1493,57 @@ class WikiController extends AbstractController
'fr_links' => $frLinks
]);
}
+
+ /**
+ * Extracts the main content from the HTML, removing headers, footers, and other unnecessary elements
+ *
+ * @param string $html The full HTML content
+ * @return string The extracted main content
+ */
+ private function extractMainContent(string $html): string
+ {
+ // Use a simple approach to extract the content
+ // This could be improved with a more sophisticated HTML parser if needed
+
+ // Create a DOMDocument to parse the HTML
+ $dom = new \DOMDocument();
+
+ // Suppress warnings about malformed HTML
+ libxml_use_internal_errors(true);
+ $dom->loadHTML($html);
+ libxml_clear_errors();
+
+ // Try to find the main content element
+ $contentElement = null;
+
+ // First, try to find the element with id "mw-content-text"
+ $contentElement = $dom->getElementById('mw-content-text');
+
+ // If not found, try to find the element with class "mw-content-ltr"
+ if (!$contentElement) {
+ $xpath = new \DOMXPath($dom);
+ $elements = $xpath->query("//*[contains(@class, 'mw-content-ltr')]");
+ if ($elements->length > 0) {
+ $contentElement = $elements->item(0);
+ }
+ }
+
+ // If still not found, return the original HTML
+ if (!$contentElement) {
+ return $html;
+ }
+
+ // Get the HTML of the content element
+ $contentHtml = $dom->saveHTML($contentElement);
+
+ // Clean up the content HTML
+ // Remove script and style elements
+ $contentHtml = preg_replace('/