qualiwiki/src/Controller/WikiController.php
2025-09-05 10:12:15 +02:00

1794 lines
No EOL
78 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Controller;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Annotation\Route;
class WikiController extends AbstractController
{
/**
* Displays the evolution of decrepitude scores from JSON history data
*/
#[Route('/wiki/decrepitude', name: 'app_admin_wiki_decrepitude')]
public function decrepitudeScores(): Response
{
$outdatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
$histogramFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/staleness_histogram.png';
$regularPages = [];
$specificPages = [];
$lastUpdated = null;
$histogramExists = file_exists($histogramFile);
if (file_exists($outdatedPagesFile)) {
$outdatedPagesData = json_decode(file_get_contents($outdatedPagesFile), true);
if (isset($outdatedPagesData['regular_pages']) && is_array($outdatedPagesData['regular_pages'])) {
$regularPages = $outdatedPagesData['regular_pages'];
}
if (isset($outdatedPagesData['specific_pages']) && is_array($outdatedPagesData['specific_pages'])) {
$specificPages = $outdatedPagesData['specific_pages'];
}
if (isset($outdatedPagesData['last_updated'])) {
$lastUpdated = $outdatedPagesData['last_updated'];
}
}
return $this->render('admin/wiki_decrepitude.html.twig', [
'regular_pages' => $regularPages,
'specific_pages' => $specificPages,
'last_updated' => $lastUpdated,
'histogram_exists' => $histogramExists,
'json_exists' => file_exists($outdatedPagesFile)
]);
}
/**
* Detects incorrect heading hierarchies in a list of sections
* For example, h4 directly under h2 without h3 in between
*
* @param array $sections List of sections with 'level' and 'title' keys
* @return array List of section indices with hierarchy errors
*/
private function detectHeadingHierarchyErrors(array $sections): array
{
$errors = [];
$lastLevel = 0;
foreach ($sections as $index => $section) {
$currentLevel = isset($section['level']) ? (int)$section['level'] : 0;
// Skip if level is not set or is 0
if ($currentLevel === 0) {
continue;
}
// If this is the first section, just record its level
if ($lastLevel === 0) {
$lastLevel = $currentLevel;
continue;
}
// Check if the level jump is more than 1
// For example, h2 -> h4 (skipping h3)
if ($currentLevel > $lastLevel + 1) {
$errors[] = $index;
}
$lastLevel = $currentLevel;
}
return $errors;
}
/**
* Builds an aligned list of sections for English and French
* Adds empty placeholders in the French column for sections that exist in English but not in French
*
* @param array $sectionComparison Section comparison data with 'common', 'en_only', and 'fr_only' keys
* @return array Aligned section list with 'en' and 'fr' columns
*/
private function buildAlignedSectionList(array $sectionComparison): array
{
$alignedSections = [];
// First, process common sections (they already have both en and fr)
// if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
// foreach ($sectionComparison['common'] as $section) {
// $alignedSections[] = [
// 'en' => $section['en'],
// 'fr' => $section['fr']
// ];
// }
// }
// Then, process English-only sections and add empty placeholders for French
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
foreach ($sectionComparison['en_only'] as $section) {
$alignedSections[] = [
'en' => [
'title' => $section['title'],
'level' => $section['level']
],
'fr' => [
'title' => '', // Empty placeholder
'level' => $section['level'], // Same level as English
'is_placeholder' => true
]
];
}
}
//
// // Finally, process French-only sections (these will be shown at the end)
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
foreach ($sectionComparison['fr_only'] as $section) {
$alignedSections[] = [
'en' => [
'title' => '', // Empty placeholder
'level' => $section['level'], // Same level as French
'is_placeholder' => true
],
'fr' => [
'title' => $section['title'],
'level' => $section['level']
]
];
}
}
return $alignedSections;
}
#[Route('/', name: 'app_public_index')]
public function accueilAction(): Response
{
return $this->redirectToRoute('app_admin_wiki');
}
#[Route('/wiki/recent-changes', name: 'app_admin_wiki_recent_changes')]
public function recentChanges(): Response
{
$recentChangesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/recent_changes.json';
// Initialize arrays
$recentChanges = [];
$lastUpdated = null;
$teamMembers = [];
// Check if the recent changes file exists and load it
if (file_exists($recentChangesFile)) {
$recentChangesData = json_decode(file_get_contents($recentChangesFile), true);
if (isset($recentChangesData['recent_changes']) && is_array($recentChangesData['recent_changes'])) {
$recentChanges = $recentChangesData['recent_changes'];
$lastUpdated = isset($recentChangesData['last_updated']) ? $recentChangesData['last_updated'] : null;
// Process team members statistics
$teamMembers = $this->processTeamMembersStats($recentChanges);
}
// Check if the data is older than 1 hour
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 hour, refresh the data
if ($diff->h >= 1 || $diff->days > 0) {
// $this->refreshRecentChangesData();
// return $this->redirectToRoute('app_admin_wiki_recent_changes');
}
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshRecentChangesData();
// Check if the file was created
if (file_exists($recentChangesFile)) {
// return $this->redirectToRoute('app_admin_wiki_recent_changes');
} else {
$this->addFlash('error', 'Impossible de générer le fichier des changements récents.');
}
}
return $this->render('admin/wiki_recent_changes.html.twig', [
'recent_changes' => $recentChanges,
'last_updated' => $lastUpdated,
'team_members' => $teamMembers
]);
}
/**
* Process team members statistics from recent changes data
*
* @param array $recentChanges Recent changes data
* @return array Team members statistics
*/
private function processTeamMembersStats(array $recentChanges): array
{
$teamMembers = [];
// Group changes by user and count modifications
foreach ($recentChanges as $change) {
$user = $change['user'];
$changeSize = $change['change_size'];
// Initialize user data if not exists
if (!isset($teamMembers[$user])) {
$teamMembers[$user] = [
'username' => $user,
'contributions' => 0,
'chars_added' => 0,
'chars_changed' => 0,
'chars_deleted' => 0,
'user_url' => "https://wiki.openstreetmap.org/wiki/User:" . urlencode($user)
];
}
// Increment contribution count
$teamMembers[$user]['contributions']++;
// Process change size
if (is_numeric($changeSize)) {
$changeSize = (int)$changeSize;
if ($changeSize > 0) {
$teamMembers[$user]['chars_added'] += $changeSize;
} elseif ($changeSize < 0) {
$teamMembers[$user]['chars_deleted'] += abs($changeSize);
} else {
// Change size is 0, might be a new page or other change
$teamMembers[$user]['chars_changed'] += 0;
}
} elseif (preg_match('/^\+(\d+)$/', $changeSize, $matches)) {
// Format like "+123"
$teamMembers[$user]['chars_added'] += (int)$matches[1];
} elseif (preg_match('/^(\d+)$/', $changeSize, $matches)) {
// Format like "123" (note: this is not a regular minus sign)
$teamMembers[$user]['chars_deleted'] += (int)$matches[1];
}
}
// Convert to indexed array and sort by contributions count (descending)
$teamMembers = array_values($teamMembers);
usort($teamMembers, function ($a, $b) {
return $b['contributions'] - $a['contributions'];
});
return $teamMembers;
}
/**
* Refresh the recent changes data by running the fetch_recent_changes.py script
*/
private function refreshRecentChangesData(): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_recent_changes.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les changements récents. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script fetch_recent_changes.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki/missing-translations', name: 'app_admin_wiki_missing_translations')]
public function missingTranslations(): Response
{
$untranslatedFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/untranslated_french_pages.json';
// Initialize arrays
$untranslatedPages = [];
$lastUpdated = null;
// Check if the untranslated pages file exists and load it
if (file_exists($untranslatedFile)) {
$untranslatedData = json_decode(file_get_contents($untranslatedFile), true);
if (isset($untranslatedData['untranslated_pages']) && is_array($untranslatedData['untranslated_pages'])) {
$untranslatedPages = $untranslatedData['untranslated_pages'];
$lastUpdated = isset($untranslatedData['last_updated']) ? $untranslatedData['last_updated'] : null;
}
// Check if the data is older than 1 hour
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 hour, refresh the data
if ($diff->h >= 1 || $diff->days > 0) {
$this->refreshUntranslatedPagesData();
return $this->redirectToRoute('app_admin_wiki_missing_translations');
}
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshUntranslatedPagesData();
// Check if the file was created
if (file_exists($untranslatedFile)) {
return $this->redirectToRoute('app_admin_wiki_missing_translations');
} else {
$this->addFlash('error', 'Impossible de générer le fichier des pages sans traduction.');
}
}
// Remove duplicates based on page title
$uniquePages = [];
$seenTitles = [];
foreach ($untranslatedPages as $page) {
if (!isset($seenTitles[$page['title']])) {
$seenTitles[$page['title']] = true;
$uniquePages[] = $page;
}
}
// Sort pages by title
usort($uniquePages, function($a, $b) {
return strcasecmp($a['title'], $b['title']);
});
return $this->render('admin/wiki_missing_translations.html.twig', [
'untranslated_pages' => $uniquePages,
'last_updated' => $lastUpdated
]);
}
/**
* Refresh the untranslated pages data by running the find_untranslated_french_pages.py script
*/
private function refreshUntranslatedPagesData(): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/find_untranslated_french_pages.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les pages sans traduction. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script find_untranslated_french_pages.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki/pages-unavailable-in-french', name: 'app_admin_wiki_pages_unavailable_in_french')]
public function pagesUnavailableInFrench(): Response
{
$unavailablePagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/pages_unavailable_in_french.json';
// Initialize arrays
$groupedPages = [];
$allPages = [];
$lastUpdated = null;
// Check if the unavailable pages file exists and load it
if (file_exists($unavailablePagesFile)) {
$unavailableData = json_decode(file_get_contents($unavailablePagesFile), true);
if (isset($unavailableData['grouped_pages']) && is_array($unavailableData['grouped_pages'])) {
$groupedPages = $unavailableData['grouped_pages'];
}
if (isset($unavailableData['all_pages']) && is_array($unavailableData['all_pages'])) {
$allPages = $unavailableData['all_pages'];
}
$lastUpdated = isset($unavailableData['last_updated']) ? $unavailableData['last_updated'] : null;
// Check if the data is older than 1 hour
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 hour, refresh the data
if ($diff->h >= 1 || $diff->days > 0) {
$this->refreshPagesUnavailableInFrenchData();
return $this->redirectToRoute('app_admin_wiki_pages_unavailable_in_french');
}
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshPagesUnavailableInFrenchData();
// Check if the file was created
if (file_exists($unavailablePagesFile)) {
return $this->redirectToRoute('app_admin_wiki_pages_unavailable_in_french');
} else {
$this->addFlash('error', 'Impossible de générer le fichier des pages non disponibles en français.');
}
}
// Move English pages to the top of the list
$englishPages = $groupedPages['En'] ?? [];
unset($groupedPages['En']);
// Sort other language groups alphabetically
ksort($groupedPages);
// Reinsert English pages at the beginning
if (!empty($englishPages)) {
$groupedPages = ['En' => $englishPages] + $groupedPages;
}
return $this->render('admin/wiki_pages_unavailable_in_french.html.twig', [
'grouped_pages' => $groupedPages,
'all_pages' => $allPages,
'last_updated' => $lastUpdated
]);
}
/**
* Refresh the pages unavailable in French data by running the find_pages_unavailable_in_french.py script
*/
private function refreshPagesUnavailableInFrenchData(): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/find_pages_unavailable_in_french.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les pages non disponibles en français. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script find_pages_unavailable_in_french.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki/osm-fr-groups', name: 'app_admin_wiki_osm_fr_groups')]
public function osmFrGroups(): Response
{
$groupsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/osm_fr_groups.json';
// Initialize arrays
$workingGroups = [];
$localGroups = [];
$umapUrl = 'https://umap.openstreetmap.fr/fr/map/groupes-locaux-openstreetmap_152488';
$lastUpdated = null;
// Check if the groups file exists and load it
if (file_exists($groupsFile)) {
$groupsData = json_decode(file_get_contents($groupsFile), true);
if (isset($groupsData['working_groups']) && is_array($groupsData['working_groups'])) {
$workingGroups = $groupsData['working_groups'];
}
if (isset($groupsData['local_groups']) && is_array($groupsData['local_groups'])) {
$localGroups = $groupsData['local_groups'];
}
$umapUrl = isset($groupsData['umap_url']) ? $groupsData['umap_url'] : 'https://umap.openstreetmap.fr/fr/map/groupes-locaux-openstreetmap_152488';
$lastUpdated = isset($groupsData['last_updated']) ? $groupsData['last_updated'] : null;
// Check if the data is older than 1 hour
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 hour, refresh the data
// if ($diff->h >= 1 || $diff->days > 0) {
// $this->refreshOsmFrGroupsData();
// return $this->redirectToRoute('app_admin_wiki_osm_fr_groups');
// }
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshOsmFrGroupsData();
// Check if the file was created
if (file_exists($groupsFile)) {
// return $this->redirectToRoute('app_admin_wiki_osm_fr_groups');
} else {
$this->addFlash('error', 'Impossible de générer le fichier des groupes OSM-FR.');
}
}
// Group working groups by category
$groupedWorkingGroups = [];
foreach ($workingGroups as $group) {
$category = $group['category'] ?? 'Autres';
if (!isset($groupedWorkingGroups[$category])) {
$groupedWorkingGroups[$category] = [];
}
$groupedWorkingGroups[$category][] = $group;
}
// Sort categories alphabetically
ksort($groupedWorkingGroups);
return $this->render('admin/wiki_osm_fr_groups.html.twig', [
'working_groups' => $groupedWorkingGroups,
'local_groups' => $localGroups,
'umap_url' => $umapUrl,
'last_updated' => $lastUpdated
]);
}
/**
* Refresh the OSM-FR groups data by running the fetch_osm_fr_groups.py script
*/
private function refreshOsmFrGroupsData(): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_osm_fr_groups.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les groupes OSM-FR. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script fetch_osm_fr_groups.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki/suspicious-deletions', name: 'app_admin_wiki_suspicious_deletions')]
public function suspiciousDeletions(): Response
{
$suspiciousDeletesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/suspicious_deletions.json';
$wordDiffFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
// Initialize arrays
$suspiciousPages = [];
$wordDiffPages = [];
// Check if the suspicious deletions file exists and load it
if (file_exists($suspiciousDeletesFile)) {
$suspiciousData = json_decode(file_get_contents($suspiciousDeletesFile), true);
if (isset($suspiciousData['deletions']) && is_array($suspiciousData['deletions'])) {
$suspiciousPages = $suspiciousData['deletions'];
$lastUpdated = isset($suspiciousData['last_updated']) ? $suspiciousData['last_updated'] : null;
}
} else {
// If the file doesn't exist, try to create it by running the script
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/detect_suspicious_deletions.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' 2>&1', $output, $returnCode);
if ($returnCode === 0 && file_exists($suspiciousDeletesFile)) {
$suspiciousData = json_decode(file_get_contents($suspiciousDeletesFile), true);
if (isset($suspiciousData['deletions']) && is_array($suspiciousData['deletions'])) {
$suspiciousPages = $suspiciousData['deletions'];
$lastUpdated = isset($suspiciousData['last_updated']) ? $suspiciousData['last_updated'] : null;
}
} else {
$this->addFlash('warning', 'Impossible de générer le fichier de suppressions suspectes. Erreur: ' . implode("\n", $output));
}
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
// Also load the word-diff based suspicious pages for comparison
if (file_exists($wordDiffFile)) {
$jsonData = json_decode(file_get_contents($wordDiffFile), true);
foreach ($jsonData as $page) {
if (isset($page['fr_page']) && isset($page['en_page'])) {
// Calculate deletion percentage
$enWordCount = (int)$page['en_page']['word_count'];
$frWordCount = (int)$page['fr_page']['word_count'];
$wordDiff = $enWordCount - $frWordCount;
// If English has more words and the difference is significant (>30%)
if ($wordDiff > 0 && $frWordCount > 0 && ($wordDiff / $enWordCount) > 0.3) {
$page['deletion_percentage'] = round(($wordDiff / $enWordCount) * 100, 2);
$wordDiffPages[] = $page;
}
}
}
// Sort by deletion percentage (highest first)
usort($wordDiffPages, function ($a, $b) {
return $b['deletion_percentage'] <=> $a['deletion_percentage'];
});
}
return $this->render('admin/wiki_suspicious_deletions.html.twig', [
'suspicious_pages' => $wordDiffPages,
'recent_deletions' => $suspiciousPages,
'last_updated' => $lastUpdated ?? null
]);
}
#[Route('/wiki/tag-proposals', name: 'app_admin_wiki_tag_proposals')]
public function tagProposals(): Response
{
$proposalsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/proposals.json';
// Initialize arrays
$votingProposals = [];
$recentProposals = [];
$lastUpdated = null;
// Check if the proposals file exists and load it
if (file_exists($proposalsFile)) {
$proposalsData = json_decode(file_get_contents($proposalsFile), true);
if (isset($proposalsData['voting_proposals']) && is_array($proposalsData['voting_proposals'])) {
$votingProposals = $proposalsData['voting_proposals'];
}
if (isset($proposalsData['recent_proposals']) && is_array($proposalsData['recent_proposals'])) {
$recentProposals = $proposalsData['recent_proposals'];
}
$lastUpdated = isset($proposalsData['last_updated']) ? $proposalsData['last_updated'] : null;
// Check if the data is older than 1 hour
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 hour, refresh the data
// if ($diff->h >= 1 || $diff->days > 0) {
// $this->refreshProposalsData();
// return $this->redirectToRoute('app_admin_wiki_tag_proposals');
// }
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshProposalsData();
// Check if the file was created
if (file_exists($proposalsFile)) {
// return $this->redirectToRoute('app_admin_wiki_tag_proposals');
} else {
$this->addFlash('error', 'Impossible de générer le fichier de propositions.');
}
}
// Format the proposals for the template
$formattedProposals = [];
foreach ($votingProposals as $proposal) {
$formattedProposal = [
'feature' => $proposal['title'],
'url' => $proposal['url'],
'description' => 'Proposition en cours de vote',
'proposer' => $proposal['proposer'] ?? '',
'status' => $proposal['status'] ?? 'Voting',
'type' => 'voting'
];
// Add voting information if available
if (isset($proposal['votes'])) {
$formattedProposal['votes'] = $proposal['votes'];
$formattedProposal['total_votes'] = $proposal['total_votes'] ?? 0;
$formattedProposal['approve_percentage'] = $proposal['approve_percentage'] ?? 0;
$formattedProposal['oppose_percentage'] = $proposal['oppose_percentage'] ?? 0;
$formattedProposal['abstain_percentage'] = $proposal['abstain_percentage'] ?? 0;
}
$formattedProposals[] = $formattedProposal;
}
foreach ($recentProposals as $proposal) {
$formattedProposals[] = [
'feature' => $proposal['title'],
'url' => $proposal['url'],
'description' => 'Dernière modification: ' . $proposal['last_modified'],
'proposer' => $proposal['modified_by'],
'status' => 'Draft',
'type' => 'recent'
];
}
return $this->render('admin/wiki_tag_proposals.html.twig', [
'proposals' => $formattedProposals,
'last_updated' => $lastUpdated
]);
}
/**
* Refresh the proposals data by running the fetch_proposals.py script
*/
private function refreshProposalsData(): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_proposals.py';
if (file_exists($scriptPath)) {
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les propositions. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script fetch_proposals.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki/random-suggestion', name: 'app_admin_wiki_random_suggestion')]
public function randomSuggestion(): Response
{
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
if (!file_exists($jsonFile)) {
$this->addFlash('error', 'Le fichier outdated_pages.json n\'existe pas.');
return $this->redirectToRoute('app_admin_wiki');
}
$jsonData = json_decode(file_get_contents($jsonFile), true);
if (empty($jsonData)) {
$this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.');
return $this->redirectToRoute('app_admin_wiki');
}
// Combine regular_pages and specific_pages into a single array
$allPages = [];
if (isset($jsonData['regular_pages']) && is_array($jsonData['regular_pages'])) {
$allPages = array_merge($allPages, $jsonData['regular_pages']);
}
if (isset($jsonData['specific_pages']) && is_array($jsonData['specific_pages'])) {
$allPages = array_merge($allPages, $jsonData['specific_pages']);
}
if (empty($allPages)) {
$this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.');
return $this->redirectToRoute('app_admin_wiki');
}
// Select a random page from the combined pages
$randomPage = $allPages[array_rand($allPages)];
return $this->render('admin/wiki_random_suggestion.html.twig', [
'page' => $randomPage
]);
}
#[Route('/wiki/create-french/{key}', name: 'app_admin_wiki_create_french')]
public function createFrench(string $key): Response
{
// Construct the URLs for the English page and the French page creation form
$englishUrl = "https://wiki.openstreetmap.org/wiki/{$key}";
$frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit";
// Fetch the HTML content of the English page using wiki_compare.py
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_compare.py';
$englishHtml = null;
$frenchHtml = null;
if (file_exists($scriptPath)) {
// Create a temporary Python script to fetch the page content
$tempScriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/temp_fetch_page.py';
$pythonCode = <<<EOT
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import json
from wiki_compare import fetch_wiki_page
# Get the key from command line arguments
key = sys.argv[1]
language = sys.argv[2]
# Fetch the page
page = fetch_wiki_page(key, language)
# Output the HTML content
if page and 'html_content' in page:
print(page['html_content'])
else:
print("")
EOT;
file_put_contents($tempScriptPath, $pythonCode);
chmod($tempScriptPath, 0755);
// Fetch English page
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} en";
$englishHtml = shell_exec($command);
// Extract only the content part from the HTML (remove headers, footers, etc.)
if ($englishHtml) {
$englishHtml = $this->extractMainContent($englishHtml);
}
// Fetch French page (might not exist, but we'll try)
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr";
$frenchHtml = shell_exec($command);
// Extract only the content part from the HTML (remove headers, footers, etc.)
if ($frenchHtml) {
$frenchHtml = $this->extractMainContent($frenchHtml);
}
// Clean up the temporary script
unlink($tempScriptPath);
}
return $this->render('admin/wiki_create_french.html.twig', [
'key' => $key,
'english_url' => $englishUrl,
'french_edit_url' => $frenchEditUrl,
'english_html' => $englishHtml,
'french_html' => $frenchHtml
]);
}
#[Route('/wiki/archived-proposals', name: 'app_admin_wiki_archived_proposals')]
public function archivedProposals(\Symfony\Component\HttpFoundation\Request $request): Response
{
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/archived_proposals.json';
$forceRefresh = $request->query->has('refresh');
$limit = $request->query->get('limit') ? (int)$request->query->get('limit') : null;
// Initialize arrays
$proposals = [];
$statistics = [];
$lastUpdated = null;
// Check if we should force a refresh
if ($forceRefresh) {
$this->refreshArchivedProposalsData($limit);
$this->addFlash('success', 'Les données des propositions archivées ont été rafraîchies.');
// Preserve the limit parameter in the redirect if it was provided
if ($limit) {
return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]);
}
return $this->redirectToRoute('app_admin_wiki_archived_proposals');
}
// Check if the archived proposals file exists and load it
if (file_exists($jsonFile)) {
$proposalsData = json_decode(file_get_contents($jsonFile), true);
if (isset($proposalsData['proposals']) && is_array($proposalsData['proposals'])) {
$proposals = $proposalsData['proposals'];
$statistics = $proposalsData['statistics'] ?? [];
$lastUpdated = isset($proposalsData['last_updated']) ? $proposalsData['last_updated'] : null;
}
// Check if the data is older than 1 day
if ($lastUpdated) {
$lastUpdatedTime = new \DateTime($lastUpdated);
$now = new \DateTime();
$diff = $now->diff($lastUpdatedTime);
// If older than 1 day, refresh the data
if ($diff->days > 1) {
$this->refreshArchivedProposalsData($limit);
$this->addFlash('info', 'Les données des propositions archivées ont été automatiquement mises à jour car elles dataient de plus d\'un jour.');
// Preserve the limit parameter in the redirect if it was provided
if ($limit) {
return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]);
}
return $this->redirectToRoute('app_admin_wiki_archived_proposals');
}
}
} else {
// If the file doesn't exist, try to create it by running the script
$this->refreshArchivedProposalsData($limit);
// Check if the file was created
if (file_exists($jsonFile)) {
$this->addFlash('success', 'Le fichier des propositions archivées a été généré avec succès.');
// Preserve the limit parameter in the redirect if it was provided
if ($limit) {
return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]);
}
return $this->redirectToRoute('app_admin_wiki_archived_proposals');
} else {
$this->addFlash('error', 'Impossible de générer le fichier des propositions archivées.');
}
}
return $this->render('admin/wiki_archived_proposals.html.twig', [
'proposals' => $proposals,
'statistics' => $statistics,
'last_updated' => $lastUpdated,
'limit' => $limit
]);
}
/**
* Refresh the archived proposals data by running the fetch_archived_proposals.py script
*
* @param int|null $limit Optional limit for the number of proposals to process
*/
private function refreshArchivedProposalsData(?int $limit = null): void
{
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_archived_proposals.py';
if (file_exists($scriptPath)) {
$command = 'python3 ' . $scriptPath;
// Add limit parameter if provided
if ($limit !== null) {
$command .= ' --limit ' . $limit;
}
exec($command . ' 2>&1', $output, $returnCode);
if ($returnCode !== 0) {
$this->addFlash('warning', 'Impossible de mettre à jour les propositions archivées. Erreur: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script fetch_archived_proposals.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
}
#[Route('/wiki', name: 'app_admin_wiki')]
public function index(): Response
{
$csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv';
if (!file_exists($csvFile)) {
$this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.');
return $this->redirectToRoute('app_admin_index');
}
$csvData = array_map('str_getcsv', file($csvFile));
$headers = array_shift($csvData);
$wikiPages = [];
$missingTranslations = [];
$pageDifferences = [];
$pagesUnavailableInEnglish = [];
// Collect all staleness scores for statistics
$stalenessScores = [];
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
$stalenessScores[] = (float)$page['staleness_score'];
}
}
// Calculate statistics
$stalenessStats = [
'count' => count($stalenessScores),
'min' => !empty($stalenessScores) ? min($stalenessScores) : 0,
'max' => !empty($stalenessScores) ? max($stalenessScores) : 0,
'mean' => 0,
'std_dev' => 0
];
// Calculate mean
if (!empty($stalenessScores)) {
$stalenessStats['mean'] = array_sum($stalenessScores) / count($stalenessScores);
// Calculate standard deviation
$variance = 0;
foreach ($stalenessScores as $score) {
$variance += pow($score - $stalenessStats['mean'], 2);
}
$stalenessStats['std_dev'] = sqrt($variance / count($stalenessScores));
}
// Round statistics to 2 decimal places
$stalenessStats['mean'] = round($stalenessStats['mean'], 2);
$stalenessStats['std_dev'] = round($stalenessStats['std_dev'], 2);
// Process pages - use absolute values without normalization
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
// Use absolute values of staleness score without normalization
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
$page['staleness_score'] = abs((float)$page['staleness_score']);
// Round to 2 decimal places
$page['staleness_score'] = round($page['staleness_score'], 2);
}
$wikiPages[$page['key']][$page['language']] = $page;
}
// Identify pages missing French translations
foreach ($wikiPages as $key => $languages) {
if (isset($languages['en']) && !isset($languages['fr'])) {
$missingTranslations[$key] = $languages['en'];
}
}
// Prepare arrays for statistics
$stats = [
'en_sections' => [],
'fr_sections' => [],
'en_words' => [],
'fr_words' => [],
'en_links' => [],
'fr_links' => [],
'en_media' => [],
'fr_media' => []
];
// Calculate differences between English and French versions
foreach ($wikiPages as $key => $languages) {
if (isset($languages['en']) && isset($languages['fr'])) {
$en = $languages['en'];
$fr = $languages['fr'];
// Calculate differences (French - English)
$sectionDiff = (int)$fr['sections'] - (int)$en['sections'];
$wordDiff = (int)$fr['word_count'] - (int)$en['word_count'];
$linkDiff = (int)$fr['link_count'] - (int)$en['link_count'];
$mediaDiff = isset($fr['media_count']) && isset($en['media_count']) ?
(int)$fr['media_count'] - (int)$en['media_count'] : 0;
// Format differences with + or - sign
$pageDifferences[$key] = [
'section_diff' => $sectionDiff,
'section_diff_formatted' => ($sectionDiff >= 0 ? '+' : '') . $sectionDiff,
'word_diff' => $wordDiff,
'word_diff_formatted' => ($wordDiff >= 0 ? '+' : '') . $wordDiff,
'link_diff' => $linkDiff,
'link_diff_formatted' => ($linkDiff >= 0 ? '+' : '') . $linkDiff,
'media_diff' => $mediaDiff,
'media_diff_formatted' => ($mediaDiff >= 0 ? '+' : '') . $mediaDiff,
];
// Collect data for statistics
$stats['en_sections'][] = (int)$en['sections'];
$stats['fr_sections'][] = (int)$fr['sections'];
$stats['en_words'][] = (int)$en['word_count'];
$stats['fr_words'][] = (int)$fr['word_count'];
$stats['en_links'][] = (int)$en['link_count'];
$stats['fr_links'][] = (int)$fr['link_count'];
$stats['en_media'][] = isset($en['media_count']) ? (int)$en['media_count'] : 0;
$stats['fr_media'][] = isset($fr['media_count']) ? (int)$fr['media_count'] : 0;
}
}
// Calculate statistics
$wikiPagesStats = [];
foreach ($stats as $key => $values) {
if (!empty($values)) {
$mean = array_sum($values) / count($values);
// Calculate standard deviation
$variance = 0;
foreach ($values as $value) {
$variance += pow($value - $mean, 2);
}
$stdDev = sqrt($variance / count($values));
$wikiPagesStats[$key] = [
'count' => count($values),
'min' => min($values),
'max' => max($values),
'mean' => round($mean, 2),
'std_dev' => round($stdDev, 2)
];
}
}
// Sort wiki pages by staleness score (descending)
uasort($wikiPages, function ($a, $b) {
$scoreA = isset($a['en']) && isset($a['fr']) && isset($a['en']['staleness_score']) ? (float)$a['en']['staleness_score'] : 0;
$scoreB = isset($b['en']) && isset($b['fr']) && isset($b['en']['staleness_score']) ? (float)$b['en']['staleness_score'] : 0;
return $scoreB <=> $scoreA;
});
// Load pages unavailable in English
$pagesUnavailableInEnglishFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/pages_unavailable_in_english.json';
if (file_exists($pagesUnavailableInEnglishFile)) {
$pagesUnavailableInEnglishData = json_decode(file_get_contents($pagesUnavailableInEnglishFile), true);
if (isset($pagesUnavailableInEnglishData['pages']) && is_array($pagesUnavailableInEnglishData['pages'])) {
$pagesUnavailableInEnglish = $pagesUnavailableInEnglishData['pages'];
}
}
// Load specific pages from outdated_pages.json
$specificPages = [];
$outdatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
if (file_exists($outdatedPagesFile)) {
$outdatedPagesData = json_decode(file_get_contents($outdatedPagesFile), true);
if (isset($outdatedPagesData['specific_pages']) && is_array($outdatedPagesData['specific_pages'])) {
$specificPages = $outdatedPagesData['specific_pages'];
}
}
// Load newly created French pages
$newlyCreatedPages = [];
$newlyCreatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/newly_created_french_pages.json';
if (file_exists($newlyCreatedPagesFile)) {
$newlyCreatedPagesData = json_decode(file_get_contents($newlyCreatedPagesFile), true);
if (isset($newlyCreatedPagesData['created_pages']) && is_array($newlyCreatedPagesData['created_pages'])) {
$newlyCreatedPages = $newlyCreatedPagesData['created_pages'];
}
}
// Load machine translations
$availableTranslations = [];
$translationsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/translations.json';
if (file_exists($translationsFile)) {
$translationsData = json_decode(file_get_contents($translationsFile), true);
if (isset($translationsData['translations']) && is_array($translationsData['translations'])) {
$availableTranslations = $translationsData['translations'];
}
}
return $this->render('admin/wiki.html.twig', [
'wiki_pages' => $wikiPages,
'missing_translations' => $missingTranslations,
'page_differences' => $pageDifferences,
'pages_unavailable_in_english' => $pagesUnavailableInEnglish,
'specific_pages' => $specificPages,
'newly_created_pages' => $newlyCreatedPages,
'staleness_stats' => $stalenessStats,
'wiki_pages_stats' => $wikiPagesStats,
'available_translations' => $availableTranslations
]);
}
#[Route('/wiki/translate/{key}', name: 'app_admin_wiki_translate', requirements: ['key' => '.+'])]
public function translate(string $key): Response
{
$this->addFlash('info', 'Traduction en cours pour la page ' . $key);
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_translate.py';
if (file_exists($scriptPath)) {
// Execute the translation script with virtual environment
$venvPython = $this->getParameter('kernel.project_dir') . '/venv/bin/python';
$command = 'cd ' . $this->getParameter('kernel.project_dir') . ' && ' . $venvPython . ' ' . $scriptPath . ' "' . $key . '"';
$output = [];
$returnVar = 0;
exec($command, $output, $returnVar);
if ($returnVar === 0) {
$this->addFlash('success', 'Traduction réussie pour la page ' . $key);
} else {
$this->addFlash('warning', 'Problème lors de la traduction: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script wiki_translate.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
return $this->redirectToRoute('app_admin_wiki');
}
#[Route('/wiki/update-translation/{key}', name: 'app_admin_wiki_update_translation', requirements: ['key' => '.+'])]
public function updateTranslation(string $key): Response
{
$this->addFlash('info', 'Mise à jour de la traduction en cours pour la page ' . $key);
try {
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_translate.py';
if (file_exists($scriptPath)) {
// Execute the translation script with the update flag and virtual environment
$venvPython = $this->getParameter('kernel.project_dir') . '/venv/bin/python';
$command = 'cd ' . $this->getParameter('kernel.project_dir') . ' && ' . $venvPython . ' ' . $scriptPath . ' "' . $key . '"';
$output = [];
$returnVar = 0;
exec($command, $output, $returnVar);
if ($returnVar === 0) {
$this->addFlash('success', 'Mise à jour de la traduction réussie pour la page ' . $key);
} else {
$this->addFlash('warning', 'Problème lors de la mise à jour de la traduction: ' . implode("\n", $output));
}
} else {
$this->addFlash('error', 'Le script wiki_translate.py n\'existe pas.');
}
} catch (\Exception $e) {
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
}
return $this->redirectToRoute('app_admin_wiki');
}
#[Route('/wiki/compare/{key}', name: 'app_admin_wiki_compare', requirements: ['key' => '.+'])]
public function compare(string $key): Response
{
$csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv';
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
if (!file_exists($csvFile)) {
$this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.');
return $this->redirectToRoute('app_admin_index');
}
$csvData = array_map('str_getcsv', file($csvFile));
$headers = array_shift($csvData);
// Process CSV data to find the requested key
$enPage = null;
$frPage = null;
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
if ($page['key'] === $key) {
if ($page['language'] === 'en') {
$enPage = $page;
} elseif ($page['language'] === 'fr') {
$frPage = $page;
}
}
}
// If English page doesn't exist, redirect back with error
if (!$enPage) {
$this->addFlash('error', 'La page wiki pour la clé "' . $key . '" n\'existe pas.');
return $this->redirectToRoute('app_admin_wiki');
}
// Get detailed content comparison from JSON file
$detailedComparison = null;
$mediaDiff = 0;
$historyData = null;
if (file_exists($jsonFile)) {
$jsonData = json_decode(file_get_contents($jsonFile), true);
// Extract history data if available
$historyData = [];
if (isset($jsonData['history']) && is_array($jsonData['history'])) {
// Process history data for the current key
foreach ($jsonData['history'] as $timestamp => $entry) {
$historyEntry = [
'timestamp' => $timestamp,
'date' => (new \DateTime($timestamp))->format('Y-m-d'),
'metrics' => []
];
// Check regular_pages
if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) {
foreach ($entry['regular_pages'] as $page) {
if (isset($page['key']) && $page['key'] === $key) {
// Extract metrics
$historyEntry['metrics'] = [
'staleness_score' => $page['staleness_score'] ?? 0,
'date_diff' => $page['date_diff'] ?? 0,
'word_diff' => $page['word_diff'] ?? 0,
'section_diff' => $page['section_diff'] ?? 0,
'link_diff' => $page['link_diff'] ?? 0,
'media_diff' => $page['media_diff'] ?? 0
];
$historyData[] = $historyEntry;
break;
}
}
}
// If not found in regular_pages, check specific_pages
if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) {
foreach ($entry['specific_pages'] as $page) {
if (isset($page['key']) && $page['key'] === $key) {
// Extract metrics
$historyEntry['metrics'] = [
'staleness_score' => $page['staleness_score'] ?? 0,
'date_diff' => $page['date_diff'] ?? 0,
'word_diff' => $page['word_diff'] ?? 0,
'section_diff' => $page['section_diff'] ?? 0,
'link_diff' => $page['link_diff'] ?? 0,
'media_diff' => $page['media_diff'] ?? 0
];
$historyData[] = $historyEntry;
break;
}
}
}
}
// Sort history data by timestamp
usort($historyData, function($a, $b) {
return strtotime($a['timestamp']) - strtotime($b['timestamp']);
});
}
// Check both regular_pages and specific_pages sections
$allPages = [];
if (isset($jsonData['regular_pages']) && is_array($jsonData['regular_pages'])) {
$allPages = array_merge($allPages, $jsonData['regular_pages']);
}
if (isset($jsonData['specific_pages']) && is_array($jsonData['specific_pages'])) {
$allPages = array_merge($allPages, $jsonData['specific_pages']);
}
foreach ($allPages as $page) {
if (isset($page['key']) && $page['key'] === $key) {
$mediaComparison = $page['media_comparison'] ?? null;
// Deduplicate images by URL in the controller and filter out images that appear in both languages
if ($mediaComparison) {
// Get all image URLs from both languages
$enOnlyImages = $mediaComparison['en_only'] ?? [];
$frOnlyImages = $mediaComparison['fr_only'] ?? [];
$commonImages = $mediaComparison['common'] ?? [];
// Extract all URLs from French images
$frImageUrls = [];
foreach ($frOnlyImages as $media) {
$frImageUrls[] = $media['src'];
}
// Also add URLs from common images (French side)
foreach ($commonImages as $commonMedia) {
if (isset($commonMedia['fr']['src'])) {
$frImageUrls[] = $commonMedia['fr']['src'];
}
}
// Extract all URLs from English images
$enImageUrls = [];
foreach ($enOnlyImages as $media) {
$enImageUrls[] = $media['src'];
}
// Also add URLs from common images (English side)
foreach ($commonImages as $commonMedia) {
if (isset($commonMedia['en']['src'])) {
$enImageUrls[] = $commonMedia['en']['src'];
}
}
// Process English-only images - deduplicate and filter out those that appear in French
$enUniqueImages = [];
$enProcessedUrls = [];
foreach ($enOnlyImages as $media) {
// Skip if this URL is already processed or if it appears in French images
if (!in_array($media['src'], $enProcessedUrls) && !in_array($media['src'], $frImageUrls)) {
$enProcessedUrls[] = $media['src'];
$enUniqueImages[] = $media;
}
}
// Process French-only images - deduplicate and filter out those that appear in English
$frUniqueImages = [];
$frProcessedUrls = [];
foreach ($frOnlyImages as $media) {
// Skip if this URL is already processed or if it appears in English images
if (!in_array($media['src'], $frProcessedUrls) && !in_array($media['src'], $enImageUrls)) {
$frProcessedUrls[] = $media['src'];
$frUniqueImages[] = $media;
}
}
// Replace the arrays with deduplicated and filtered versions
$mediaComparison['en_only'] = $enUniqueImages;
$mediaComparison['fr_only'] = $frUniqueImages;
$mediaComparison['en_only_count'] = count($enOnlyImages);
$mediaComparison['fr_only_count'] = count($frOnlyImages);
}
// Get link comparison data
$linkComparison = $page['link_comparison'] ?? null;
// Sort links alphabetically by URL if link comparison exists
if ($linkComparison) {
// Sort English-only links
if (isset($linkComparison['en_only']) && is_array($linkComparison['en_only'])) {
usort($linkComparison['en_only'], function ($a, $b) {
return strcmp($a['href'], $b['href']);
});
}
// Sort French-only links
if (isset($linkComparison['fr_only']) && is_array($linkComparison['fr_only'])) {
usort($linkComparison['fr_only'], function ($a, $b) {
return strcmp($a['href'], $b['href']);
});
}
// Sort common links
if (isset($linkComparison['common']) && is_array($linkComparison['common'])) {
usort($linkComparison['common'], function ($a, $b) {
return strcmp($a['en']['href'], $b['en']['href']);
});
}
}
// Get section comparison data and filter out "Contents" sections and navigation sections
$sectionComparison = $page['section_comparison'] ?? null;
// Sections to exclude from comparison (navigation elements)
$excludedSections = [
'Contents', 'Sommaire',
'Personal tools', 'Namespaces', 'Views', 'Search', 'Site', 'Tools', 'In other projects'
];
// Filter out excluded sections if section comparison exists
if ($sectionComparison) {
// Filter common sections
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
$sectionComparison['common'] = array_filter($sectionComparison['common'], function ($section) use ($excludedSections) {
// Skip if either English or French title is in the excluded list
return !(in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections));
});
// Re-index array
$sectionComparison['common'] = array_values($sectionComparison['common']);
}
// Filter English-only sections
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
$sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function ($section) use ($excludedSections) {
return !in_array($section['title'], $excludedSections);
});
// Re-index array
$sectionComparison['en_only'] = array_values($sectionComparison['en_only']);
}
// Filter French-only sections
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
$sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function ($section) use ($excludedSections) {
return !in_array($section['title'], $excludedSections);
});
// Re-index array
$sectionComparison['fr_only'] = array_values($sectionComparison['fr_only']);
}
}
// Calculate adjusted section counts (excluding "Contents" sections)
$enSectionCount = $enPage['sections'];
$frSectionCount = $frPage['sections'];
// Adjust section counts if we have section comparison data
if ($sectionComparison) {
// Count how many sections were filtered out
$filteredCount = 0;
// Check common sections that were filtered
if (isset($page['section_comparison']['common']) && is_array($page['section_comparison']['common'])) {
foreach ($page['section_comparison']['common'] as $section) {
if (in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections)) {
$filteredCount++;
}
}
}
// Check English-only sections that were filtered
if (isset($page['section_comparison']['en_only']) && is_array($page['section_comparison']['en_only'])) {
foreach ($page['section_comparison']['en_only'] as $section) {
if (in_array($section['title'], $excludedSections)) {
$filteredCount++;
}
}
}
// Check French-only sections that were filtered
if (isset($page['section_comparison']['fr_only']) && is_array($page['section_comparison']['fr_only'])) {
foreach ($page['section_comparison']['fr_only'] as $section) {
if (in_array($section['title'], $excludedSections)) {
$filteredCount++;
}
}
}
// Adjust section counts
$enSectionCount -= $filteredCount;
$frSectionCount -= $filteredCount;
}
// Check for incorrect heading hierarchies
$enHierarchyErrors = [];
$frHierarchyErrors = [];
// Check English sections
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
$enHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['en_only']);
}
// Also check common sections (English side)
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
$commonEnSections = array_map(function ($section) {
return $section['en'];
}, $sectionComparison['common']);
$enHierarchyErrors = array_merge($enHierarchyErrors, $this->detectHeadingHierarchyErrors($commonEnSections));
}
// Check French sections
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
$frHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['fr_only']);
}
// Also check common sections (French side)
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
$commonFrSections = array_map(function ($section) {
return $section['fr'];
}, $sectionComparison['common']);
$frHierarchyErrors = array_merge($frHierarchyErrors, $this->detectHeadingHierarchyErrors($commonFrSections));
}
// Build aligned section list for better visualization of missing sections
$alignedSections = $this->buildAlignedSectionList($sectionComparison);
$detailedComparison = [
'section_comparison' => $sectionComparison,
'aligned_sections' => $alignedSections,
'link_comparison' => $linkComparison,
'media_comparison' => $mediaComparison,
'category_comparison' => $page['category_comparison'] ?? null,
'grammar_suggestions' => $page['grammar_suggestions'] ?? null,
'adjusted_en_section_count' => $enSectionCount,
'adjusted_fr_section_count' => $frSectionCount,
'en_hierarchy_errors' => $enHierarchyErrors,
'fr_hierarchy_errors' => $frHierarchyErrors
];
$mediaDiff = $page['media_diff'] ?? 0;
break;
}
}
}
// Calculate staleness score components
$scoreComponents = [];
if ($frPage) {
// Calculate date difference in days
$dateDiff = 0;
if ($enPage['last_modified'] && $frPage['last_modified']) {
$enDate = \DateTime::createFromFormat('Y-m-d', $enPage['last_modified']);
$frDate = \DateTime::createFromFormat('Y-m-d', $frPage['last_modified']);
if ($enDate && $frDate) {
$dateDiff = ($enDate->getTimestamp() - $frDate->getTimestamp()) / (60 * 60 * 24);
}
}
// Calculate content differences
$wordDiff = $enPage['word_count'] - $frPage['word_count'];
$sectionDiff = $enPage['sections'] - $frPage['sections'];
$linkDiff = $enPage['link_count'] - $frPage['link_count'];
// Calculate score components
$dateComponent = abs($dateDiff) * 0.2;
$wordComponent = (abs($wordDiff) / 100) * 0.5;
$sectionComponent = abs($sectionDiff) * 0.15;
$linkComponent = (abs($linkDiff) / 10) * 0.15;
$scoreComponents = [
'date' => [
'value' => $dateDiff,
'weight' => 0.2,
'component' => $dateComponent,
'description' => 'Différence de date (en jours)'
],
'word' => [
'value' => $wordDiff,
'weight' => 0.5,
'component' => $wordComponent,
'description' => 'Différence de nombre de mots'
],
'section' => [
'value' => $sectionDiff,
'weight' => 0.15,
'component' => $sectionComponent,
'description' => 'Différence de nombre de sections'
],
'link' => [
'value' => $linkDiff,
'weight' => 0.15,
'component' => $linkComponent,
'description' => 'Différence de nombre de liens'
]
];
// Add media component if available
if (isset($enPage['media_count']) && isset($frPage['media_count'])) {
$mediaComponent = (abs($mediaDiff) / 5) * 0.1;
$scoreComponents['media'] = [
'value' => $mediaDiff,
'weight' => 0.1,
'component' => $mediaComponent,
'description' => 'Différence de nombre d\'images'
];
// Adjust other weights to maintain total of 1.0
$scoreComponents['date']['weight'] = 0.2;
$scoreComponents['word']['weight'] = 0.45;
$scoreComponents['section']['weight'] = 0.15;
$scoreComponents['link']['weight'] = 0.1;
}
}
// Create URL for new French page if it doesn't exist
$createFrUrl = null;
if (!$frPage) {
$createFrUrl = 'https://wiki.openstreetmap.org/wiki/FR:' . $key;
}
// Format section titles for copy functionality
$enSections = '';
$frSections = '';
if ($detailedComparison && $detailedComparison['section_comparison']) {
// English sections
if ($enPage) {
$enSectionsList = [];
// Add common sections
foreach ($detailedComparison['section_comparison']['common'] as $section) {
$enSectionsList[] = str_repeat('=', $section['en']['level']) . ' ' .
$section['en']['title'] . ' ' .
str_repeat('=', $section['en']['level']);
}
// Add English-only sections
foreach ($detailedComparison['section_comparison']['en_only'] as $section) {
$enSectionsList[] = str_repeat('=', $section['level']) . ' ' .
$section['title'] . ' ' .
str_repeat('=', $section['level']) . ' (EN only)';
}
$enSections = implode("\n", $enSectionsList);
}
// French sections
if ($frPage) {
$frSectionsList = [];
// Add common sections
foreach ($detailedComparison['section_comparison']['common'] as $section) {
$frSectionsList[] = str_repeat('=', $section['fr']['level']) . ' ' .
$section['fr']['title'] . ' ' .
str_repeat('=', $section['fr']['level']);
}
// Add French-only sections
foreach ($detailedComparison['section_comparison']['fr_only'] as $section) {
$frSectionsList[] = str_repeat('=', $section['level']) . ' ' .
$section['title'] . ' ' .
str_repeat('=', $section['level']) . ' (FR only)';
}
$frSections = implode("\n", $frSectionsList);
}
}
// Format links for copy functionality
$enLinks = '';
$frLinks = '';
if ($detailedComparison && $detailedComparison['link_comparison']) {
// English links
if ($enPage) {
$enLinksList = [];
// Add common links
foreach ($detailedComparison['link_comparison']['common'] as $link) {
$enLinksList[] = $link['en']['text'] . ' - ' . $link['en']['href'];
}
// Add English-only links
foreach ($detailedComparison['link_comparison']['en_only'] as $link) {
$enLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (EN only)';
}
$enLinks = implode("\n", $enLinksList);
}
// French links
if ($frPage) {
$frLinksList = [];
// Add common links
foreach ($detailedComparison['link_comparison']['common'] as $link) {
$frLinksList[] = $link['fr']['text'] . ' - ' . $link['fr']['href'];
}
// Add French-only links
foreach ($detailedComparison['link_comparison']['fr_only'] as $link) {
$frLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (FR only)';
}
$frLinks = implode("\n", $frLinksList);
}
}
// Ensure page URLs are strings to prevent array to string conversion errors
if ($frPage && isset($frPage['url']) && is_array($frPage['url'])) {
$frPage['url'] = json_encode($frPage['url']);
}
if ($enPage && isset($enPage['url']) && is_array($enPage['url'])) {
$enPage['url'] = json_encode($enPage['url']);
}
return $this->render('admin/wiki_compare.html.twig', [
'key' => $key,
'en_page' => $enPage,
'fr_page' => $frPage,
'score_components' => $scoreComponents,
'create_fr_url' => $createFrUrl,
'detailed_comparison' => $detailedComparison,
'en_sections' => $enSections,
'fr_sections' => $frSections,
'en_links' => $enLinks,
'fr_links' => $frLinks,
'history_data' => $historyData
]);
}
/**
* Extracts the main content from the HTML, removing headers, footers, and other unnecessary elements
*
* @param string $html The full HTML content
* @return string The extracted main content
*/
private function extractMainContent(string $html): string
{
// Use a simple approach to extract the content
// This could be improved with a more sophisticated HTML parser if needed
// Create a DOMDocument to parse the HTML
$dom = new \DOMDocument();
// Suppress warnings about malformed HTML
libxml_use_internal_errors(true);
$dom->loadHTML($html);
libxml_clear_errors();
// Try to find the main content element
$contentElement = null;
// First, try to find the element with id "mw-content-text"
$contentElement = $dom->getElementById('mw-content-text');
// If not found, try to find the element with class "mw-content-ltr"
if (!$contentElement) {
$xpath = new \DOMXPath($dom);
$elements = $xpath->query("//*[contains(@class, 'mw-content-ltr')]");
if ($elements->length > 0) {
$contentElement = $elements->item(0);
}
}
// If still not found, return the original HTML
if (!$contentElement) {
return $html;
}
// Get the HTML of the content element
$contentHtml = $dom->saveHTML($contentElement);
// Clean up the content HTML
// Remove script and style elements
$contentHtml = preg_replace('/<script\b[^>]*>(.*?)<\/script>/is', '', $contentHtml);
$contentHtml = preg_replace('/<style\b[^>]*>(.*?)<\/style>/is', '', $contentHtml);
// Remove edit section links
$contentHtml = preg_replace('/<span class="mw-editsection">(.*?)<\/span>/is', '', $contentHtml);
return $contentHtml;
}
}