getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json'; $histogramFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/staleness_histogram.png'; $regularPages = []; $specificPages = []; $lastUpdated = null; $histogramExists = file_exists($histogramFile); if (file_exists($outdatedPagesFile)) { // Use memory-efficient approach to extract data from the large JSON file $maxPages = 100; // Limit the number of pages to prevent memory exhaustion // Extract regular_pages array $regularPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'regular_pages', $maxPages); // Extract specific_pages array $specificPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'specific_pages', $maxPages); // Extract last_updated value $lastUpdated = $this->extractJsonScalarByKey($outdatedPagesFile, 'last_updated'); } return $this->render('admin/wiki_decrepitude.html.twig', [ 'regular_pages' => $regularPages, 'specific_pages' => $specificPages, 'last_updated' => $lastUpdated, 'histogram_exists' => $histogramExists, 'json_exists' => file_exists($outdatedPagesFile) ]); } /** * Displays the evolution of page rankings over time */ #[Route('/wiki/rankings', name: 'app_admin_wiki_rankings')] public function pageRankings(): Response { $rankingsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/page_rankings.json'; $timestamps = []; $pages = []; $globalMetrics = []; $lastUpdated = null; if (file_exists($rankingsFile)) { // Load the rankings data try { $rankingsData = json_decode(file_get_contents($rankingsFile), true); if (json_last_error() === JSON_ERROR_NONE) { $timestamps = $rankingsData['timestamps'] ?? []; $pages = $rankingsData['pages'] ?? []; $globalMetrics = $rankingsData['global_metrics'] ?? []; // Get the last timestamp as last_updated if (!empty($timestamps)) { $lastUpdated = end($timestamps); } } } catch (\Exception $e) { // Log the error error_log("Error loading rankings data: " . $e->getMessage()); } } return $this->render('admin/wiki_rankings.html.twig', [ 'timestamps' => $timestamps, 'pages' => $pages, 'global_metrics' => $globalMetrics, 'last_updated' => $lastUpdated, 'json_exists' => file_exists($rankingsFile) ]); } /** * Detects incorrect heading hierarchies in a list of sections * For example, h4 directly under h2 without h3 in between * * @param array $sections List of sections with 'level' and 'title' keys * @return array List of section indices with hierarchy errors */ private function detectHeadingHierarchyErrors(array $sections): array { $errors = []; $lastLevel = 0; foreach ($sections as $index => $section) { $currentLevel = isset($section['level']) ? (int)$section['level'] : 0; // Skip if level is not set or is 0 if ($currentLevel === 0) { continue; } // If this is the first section, just record its level if ($lastLevel === 0) { $lastLevel = $currentLevel; continue; } // Check if the level jump is more than 1 // For example, h2 -> h4 (skipping h3) if ($currentLevel > $lastLevel + 1) { $errors[] = $index; } $lastLevel = $currentLevel; } return $errors; } /** * Builds an aligned list of sections for English and French * Adds empty placeholders in the French column for sections that exist in English but not in French * * @param array $sectionComparison Section comparison data with 'common', 'en_only', and 'fr_only' keys * @return array Aligned section list with 'en' and 'fr' columns */ private function buildAlignedSectionList(array $sectionComparison): array { $alignedSections = []; // First, process common sections (they already have both en and fr) // if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { // foreach ($sectionComparison['common'] as $section) { // $alignedSections[] = [ // 'en' => $section['en'], // 'fr' => $section['fr'] // ]; // } // } // Then, process English-only sections and add empty placeholders for French if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) { foreach ($sectionComparison['en_only'] as $section) { $alignedSections[] = [ 'en' => [ 'title' => $section['title'], 'level' => $section['level'] ], 'fr' => [ 'title' => '', // Empty placeholder 'level' => $section['level'], // Same level as English 'is_placeholder' => true ] ]; } } // // // Finally, process French-only sections (these will be shown at the end) if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) { foreach ($sectionComparison['fr_only'] as $section) { $alignedSections[] = [ 'en' => [ 'title' => '', // Empty placeholder 'level' => $section['level'], // Same level as French 'is_placeholder' => true ], 'fr' => [ 'title' => $section['title'], 'level' => $section['level'] ] ]; } } return $alignedSections; } #[Route('/', name: 'app_public_index')] public function accueilAction(): Response { return $this->redirectToRoute('app_admin_wiki'); } #[Route('/wiki/recent-changes', name: 'app_admin_wiki_recent_changes')] public function recentChanges(): Response { $recentChangesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/recent_changes.json'; // Initialize arrays $recentChanges = []; $lastUpdated = null; $teamMembers = []; // Check if the recent changes file exists and load it if (file_exists($recentChangesFile)) { $recentChangesData = json_decode(file_get_contents($recentChangesFile), true); if (isset($recentChangesData['recent_changes']) && is_array($recentChangesData['recent_changes'])) { $recentChanges = $recentChangesData['recent_changes']; $lastUpdated = isset($recentChangesData['last_updated']) ? $recentChangesData['last_updated'] : null; // Process team members statistics $teamMembers = $this->processTeamMembersStats($recentChanges); } // Check if the data is older than 1 hour if ($lastUpdated) { $lastUpdatedTime = new \DateTime($lastUpdated); $now = new \DateTime(); $diff = $now->diff($lastUpdatedTime); // If older than 1 hour, refresh the data if ($diff->h >= 1 || $diff->days > 0) { // $this->refreshRecentChangesData(); // return $this->redirectToRoute('app_admin_wiki_recent_changes'); } } } else { // If the file doesn't exist, try to create it by running the script $this->refreshRecentChangesData(); // Check if the file was created if (file_exists($recentChangesFile)) { // return $this->redirectToRoute('app_admin_wiki_recent_changes'); } else { $this->addFlash('error', 'Impossible de générer le fichier des changements récents.'); } } return $this->render('admin/wiki_recent_changes.html.twig', [ 'recent_changes' => $recentChanges, 'last_updated' => $lastUpdated, 'team_members' => $teamMembers ]); } /** * Process team members statistics from recent changes data * * @param array $recentChanges Recent changes data * @return array Team members statistics */ private function processTeamMembersStats(array $recentChanges): array { $teamMembers = []; // Group changes by user and count modifications foreach ($recentChanges as $change) { $user = $change['user']; $changeSize = $change['change_size']; // Initialize user data if not exists if (!isset($teamMembers[$user])) { $teamMembers[$user] = [ 'username' => $user, 'contributions' => 0, 'chars_added' => 0, 'chars_changed' => 0, 'chars_deleted' => 0, 'user_url' => "https://wiki.openstreetmap.org/wiki/User:" . urlencode($user) ]; } // Increment contribution count $teamMembers[$user]['contributions']++; // Process change size if (is_numeric($changeSize)) { $changeSize = (int)$changeSize; if ($changeSize > 0) { $teamMembers[$user]['chars_added'] += $changeSize; } elseif ($changeSize < 0) { $teamMembers[$user]['chars_deleted'] += abs($changeSize); } else { // Change size is 0, might be a new page or other change $teamMembers[$user]['chars_changed'] += 0; } } elseif (preg_match('/^\+(\d+)$/', $changeSize, $matches)) { // Format like "+123" $teamMembers[$user]['chars_added'] += (int)$matches[1]; } elseif (preg_match('/^−(\d+)$/', $changeSize, $matches)) { // Format like "−123" (note: this is not a regular minus sign) $teamMembers[$user]['chars_deleted'] += (int)$matches[1]; } } // Convert to indexed array and sort by contributions count (descending) $teamMembers = array_values($teamMembers); usort($teamMembers, function ($a, $b) { return $b['contributions'] - $a['contributions']; }); return $teamMembers; } /** * Refresh the recent changes data by running the fetch_recent_changes.py script */ private function refreshRecentChangesData(): void { try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_recent_changes.py'; if (file_exists($scriptPath)) { exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode); if ($returnCode !== 0) { $this->addFlash('warning', 'Impossible de mettre à jour les changements récents. Erreur: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script fetch_recent_changes.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } #[Route('/wiki/missing-translations', name: 'app_admin_wiki_missing_translations')] public function missingTranslations(): Response { $untranslatedFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/untranslated_french_pages.json'; // Initialize arrays $untranslatedPages = []; $lastUpdated = null; // Check if the untranslated pages file exists and load it if (file_exists($untranslatedFile)) { $untranslatedData = json_decode(file_get_contents($untranslatedFile), true); if (isset($untranslatedData['untranslated_pages']) && is_array($untranslatedData['untranslated_pages'])) { $untranslatedPages = $untranslatedData['untranslated_pages']; $lastUpdated = isset($untranslatedData['last_updated']) ? $untranslatedData['last_updated'] : null; } // Check if the data is older than 1 hour if ($lastUpdated) { $lastUpdatedTime = new \DateTime($lastUpdated); $now = new \DateTime(); $diff = $now->diff($lastUpdatedTime); // If older than 1 hour, refresh the data if ($diff->h >= 1 || $diff->days > 0) { $this->refreshUntranslatedPagesData(); return $this->redirectToRoute('app_admin_wiki_missing_translations'); } } } else { // If the file doesn't exist, try to create it by running the script $this->refreshUntranslatedPagesData(); // Check if the file was created if (file_exists($untranslatedFile)) { return $this->redirectToRoute('app_admin_wiki_missing_translations'); } else { $this->addFlash('error', 'Impossible de générer le fichier des pages sans traduction.'); } } // Remove duplicates based on page title $uniquePages = []; $seenTitles = []; foreach ($untranslatedPages as $page) { if (!isset($seenTitles[$page['title']])) { $seenTitles[$page['title']] = true; $uniquePages[] = $page; } } // Sort pages by title usort($uniquePages, function($a, $b) { return strcasecmp($a['title'], $b['title']); }); return $this->render('admin/wiki_missing_translations.html.twig', [ 'untranslated_pages' => $uniquePages, 'last_updated' => $lastUpdated ]); } /** * Refresh the untranslated pages data by running the find_untranslated_french_pages.py script */ private function refreshUntranslatedPagesData(): void { try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/find_untranslated_french_pages.py'; if (file_exists($scriptPath)) { exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode); if ($returnCode !== 0) { $this->addFlash('warning', 'Impossible de mettre à jour les pages sans traduction. Erreur: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script find_untranslated_french_pages.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } #[Route('/wiki/pages-unavailable-in-french', name: 'app_admin_wiki_pages_unavailable_in_french')] public function pagesUnavailableInFrench(): Response { $unavailablePagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/pages_unavailable_in_french.json'; // Initialize arrays $groupedPages = []; $allPages = []; $lastUpdated = null; // Check if the unavailable pages file exists and load it if (file_exists($unavailablePagesFile)) { $unavailableData = json_decode(file_get_contents($unavailablePagesFile), true); if (isset($unavailableData['grouped_pages']) && is_array($unavailableData['grouped_pages'])) { $groupedPages = $unavailableData['grouped_pages']; } if (isset($unavailableData['all_pages']) && is_array($unavailableData['all_pages'])) { $allPages = $unavailableData['all_pages']; } $lastUpdated = isset($unavailableData['last_updated']) ? $unavailableData['last_updated'] : null; // Check if the data is older than 1 hour if ($lastUpdated) { $lastUpdatedTime = new \DateTime($lastUpdated); $now = new \DateTime(); $diff = $now->diff($lastUpdatedTime); // If older than 1 hour, refresh the data if ($diff->h >= 1 || $diff->days > 0) { $this->refreshPagesUnavailableInFrenchData(); return $this->redirectToRoute('app_admin_wiki_pages_unavailable_in_french'); } } } else { // If the file doesn't exist, try to create it by running the script $this->refreshPagesUnavailableInFrenchData(); // Check if the file was created if (file_exists($unavailablePagesFile)) { return $this->redirectToRoute('app_admin_wiki_pages_unavailable_in_french'); } else { $this->addFlash('error', 'Impossible de générer le fichier des pages non disponibles en français.'); } } // Move English pages to the top of the list $englishPages = $groupedPages['En'] ?? []; unset($groupedPages['En']); // Sort other language groups alphabetically ksort($groupedPages); // Reinsert English pages at the beginning if (!empty($englishPages)) { $groupedPages = ['En' => $englishPages] + $groupedPages; } return $this->render('admin/wiki_pages_unavailable_in_french.html.twig', [ 'grouped_pages' => $groupedPages, 'all_pages' => $allPages, 'last_updated' => $lastUpdated ]); } /** * Refresh the pages unavailable in French data by running the find_pages_unavailable_in_french.py script */ private function refreshPagesUnavailableInFrenchData(): void { try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/find_pages_unavailable_in_french.py'; if (file_exists($scriptPath)) { exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode); if ($returnCode !== 0) { $this->addFlash('warning', 'Impossible de mettre à jour les pages non disponibles en français. Erreur: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script find_pages_unavailable_in_french.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } #[Route('/wiki/osm-fr-groups', name: 'app_admin_wiki_osm_fr_groups')] public function osmFrGroups(): Response { $groupsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/osm_fr_groups.json'; // Initialize arrays $workingGroups = []; $localGroups = []; $umapUrl = 'https://umap.openstreetmap.fr/fr/map/groupes-locaux-openstreetmap_152488'; $lastUpdated = null; // Check if the groups file exists and load it if (file_exists($groupsFile)) { $groupsData = json_decode(file_get_contents($groupsFile), true); if (isset($groupsData['working_groups']) && is_array($groupsData['working_groups'])) { $workingGroups = $groupsData['working_groups']; } if (isset($groupsData['local_groups']) && is_array($groupsData['local_groups'])) { $localGroups = $groupsData['local_groups']; } $umapUrl = isset($groupsData['umap_url']) ? $groupsData['umap_url'] : 'https://umap.openstreetmap.fr/fr/map/groupes-locaux-openstreetmap_152488'; $lastUpdated = isset($groupsData['last_updated']) ? $groupsData['last_updated'] : null; // Check if the data is older than 1 hour if ($lastUpdated) { $lastUpdatedTime = new \DateTime($lastUpdated); $now = new \DateTime(); $diff = $now->diff($lastUpdatedTime); // If older than 1 hour, refresh the data // if ($diff->h >= 1 || $diff->days > 0) { // $this->refreshOsmFrGroupsData(); // return $this->redirectToRoute('app_admin_wiki_osm_fr_groups'); // } } } else { // If the file doesn't exist, try to create it by running the script $this->refreshOsmFrGroupsData(); // Check if the file was created if (file_exists($groupsFile)) { // return $this->redirectToRoute('app_admin_wiki_osm_fr_groups'); } else { $this->addFlash('error', 'Impossible de générer le fichier des groupes OSM-FR.'); } } // Group working groups by category $groupedWorkingGroups = []; foreach ($workingGroups as $group) { $category = $group['category'] ?? 'Autres'; if (!isset($groupedWorkingGroups[$category])) { $groupedWorkingGroups[$category] = []; } $groupedWorkingGroups[$category][] = $group; } // Sort categories alphabetically ksort($groupedWorkingGroups); return $this->render('admin/wiki_osm_fr_groups.html.twig', [ 'working_groups' => $groupedWorkingGroups, 'local_groups' => $localGroups, 'umap_url' => $umapUrl, 'last_updated' => $lastUpdated ]); } /** * Refresh the OSM-FR groups data by running the fetch_osm_fr_groups.py script */ private function refreshOsmFrGroupsData(): void { try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_osm_fr_groups.py'; if (file_exists($scriptPath)) { exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode); if ($returnCode !== 0) { $this->addFlash('warning', 'Impossible de mettre à jour les groupes OSM-FR. Erreur: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script fetch_osm_fr_groups.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } #[Route('/wiki/suspicious-deletions', name: 'app_admin_wiki_suspicious_deletions')] public function suspiciousDeletions(): Response { $suspiciousDeletesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/suspicious_deletions.json'; $wordDiffFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json'; // Initialize arrays $suspiciousPages = []; $wordDiffPages = []; // Check if the suspicious deletions file exists and load it if (file_exists($suspiciousDeletesFile)) { $suspiciousData = json_decode(file_get_contents($suspiciousDeletesFile), true); if (isset($suspiciousData['deletions']) && is_array($suspiciousData['deletions'])) { $suspiciousPages = $suspiciousData['deletions']; $lastUpdated = isset($suspiciousData['last_updated']) ? $suspiciousData['last_updated'] : null; } } else { // If the file doesn't exist, try to create it by running the script try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/detect_suspicious_deletions.py'; if (file_exists($scriptPath)) { exec('python3 ' . $scriptPath . ' 2>&1', $output, $returnCode); if ($returnCode === 0 && file_exists($suspiciousDeletesFile)) { $suspiciousData = json_decode(file_get_contents($suspiciousDeletesFile), true); if (isset($suspiciousData['deletions']) && is_array($suspiciousData['deletions'])) { $suspiciousPages = $suspiciousData['deletions']; $lastUpdated = isset($suspiciousData['last_updated']) ? $suspiciousData['last_updated'] : null; } } else { $this->addFlash('warning', 'Impossible de générer le fichier de suppressions suspectes. Erreur: ' . implode("\n", $output)); } } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } // Also load the word-diff based suspicious pages for comparison if (file_exists($wordDiffFile)) { $jsonData = json_decode(file_get_contents($wordDiffFile), true); foreach ($jsonData as $page) { if (isset($page['fr_page']) && isset($page['en_page'])) { // Calculate deletion percentage $enWordCount = (int)$page['en_page']['word_count']; $frWordCount = (int)$page['fr_page']['word_count']; $wordDiff = $enWordCount - $frWordCount; // If English has more words and the difference is significant (>30%) if ($wordDiff > 0 && $frWordCount > 0 && ($wordDiff / $enWordCount) > 0.3) { $page['deletion_percentage'] = round(($wordDiff / $enWordCount) * 100, 2); $wordDiffPages[] = $page; } } } // Sort by deletion percentage (highest first) usort($wordDiffPages, function ($a, $b) { return $b['deletion_percentage'] <=> $a['deletion_percentage']; }); } return $this->render('admin/wiki_suspicious_deletions.html.twig', [ 'suspicious_pages' => $wordDiffPages, 'recent_deletions' => $suspiciousPages, 'last_updated' => $lastUpdated ?? null ]); } #[Route('/wiki/tag-proposals', name: 'app_admin_wiki_tag_proposals')] public function tagProposals(): Response { $proposalsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/proposals.json'; // Initialize arrays $votingProposals = []; $recentProposals = []; $lastUpdated = null; // Check if the proposals file exists and load it if (file_exists($proposalsFile)) { $proposalsData = json_decode(file_get_contents($proposalsFile), true); if (isset($proposalsData['voting_proposals']) && is_array($proposalsData['voting_proposals'])) { $votingProposals = $proposalsData['voting_proposals']; } if (isset($proposalsData['recent_proposals']) && is_array($proposalsData['recent_proposals'])) { $recentProposals = $proposalsData['recent_proposals']; } $lastUpdated = isset($proposalsData['last_updated']) ? $proposalsData['last_updated'] : null; // Check if the data is older than 1 hour if ($lastUpdated) { $lastUpdatedTime = new \DateTime($lastUpdated); $now = new \DateTime(); $diff = $now->diff($lastUpdatedTime); // If older than 1 hour, refresh the data // if ($diff->h >= 1 || $diff->days > 0) { // $this->refreshProposalsData(); // return $this->redirectToRoute('app_admin_wiki_tag_proposals'); // } } } else { // If the file doesn't exist, try to create it by running the script $this->refreshProposalsData(); // Check if the file was created if (file_exists($proposalsFile)) { // return $this->redirectToRoute('app_admin_wiki_tag_proposals'); } else { $this->addFlash('error', 'Impossible de générer le fichier de propositions.'); } } // Format the proposals for the template $formattedProposals = []; foreach ($votingProposals as $proposal) { $formattedProposal = [ 'feature' => $proposal['title'], 'url' => $proposal['url'], 'description' => 'Proposition en cours de vote', 'proposer' => $proposal['proposer'] ?? '', 'status' => $proposal['status'] ?? 'Voting', 'type' => 'voting' ]; // Add voting information if available if (isset($proposal['votes'])) { $formattedProposal['votes'] = $proposal['votes']; $formattedProposal['total_votes'] = $proposal['total_votes'] ?? 0; $formattedProposal['approve_percentage'] = $proposal['approve_percentage'] ?? 0; $formattedProposal['oppose_percentage'] = $proposal['oppose_percentage'] ?? 0; $formattedProposal['abstain_percentage'] = $proposal['abstain_percentage'] ?? 0; } $formattedProposals[] = $formattedProposal; } foreach ($recentProposals as $proposal) { $formattedProposals[] = [ 'feature' => $proposal['title'], 'url' => $proposal['url'], 'description' => 'Dernière modification: ' . $proposal['last_modified'], 'proposer' => $proposal['modified_by'], 'status' => 'Draft', 'type' => 'recent' ]; } return $this->render('admin/wiki_tag_proposals.html.twig', [ 'proposals' => $formattedProposals, 'last_updated' => $lastUpdated ]); } /** * Refresh the proposals data by running the fetch_proposals.py script */ private function refreshProposalsData(): void { try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_proposals.py'; if (file_exists($scriptPath)) { exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode); if ($returnCode !== 0) { $this->addFlash('warning', 'Impossible de mettre à jour les propositions. Erreur: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script fetch_proposals.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } #[Route('/wiki/random-suggestion', name: 'app_admin_wiki_random_suggestion')] public function randomSuggestion(): Response { $jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json'; if (!file_exists($jsonFile)) { $this->addFlash('error', 'Le fichier outdated_pages.json n\'existe pas.'); return $this->redirectToRoute('app_admin_wiki'); } // Use memory-efficient approach to extract only the necessary data $maxItems = 100; // Limit the number of items to prevent memory exhaustion // Extract regular_pages and specific_pages arrays $regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems); $specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems); // Combine them into a single array $allPages = array_merge($regularPages, $specificPages); if (empty($allPages)) { $this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.'); return $this->redirectToRoute('app_admin_wiki'); } // Select a random page from the combined pages $randomPage = $allPages[array_rand($allPages)]; return $this->render('admin/wiki_random_suggestion.html.twig', [ 'page' => $randomPage ]); } #[Route('/wiki/create-french/{key}', name: 'app_admin_wiki_create_french', requirements: ['key' => '.+'])] public function createFrench(string $key): Response { // Construct the URLs for the English page and the French page creation form $englishUrl = "https://wiki.openstreetmap.org/wiki/{$key}"; $frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit"; // Fetch the HTML content of the English page using wiki_compare.py $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_compare.py'; $englishHtml = null; $frenchHtml = null; $frenchCacheExists = false; if (file_exists($scriptPath)) { // Create a temporary Python script to fetch the page content $tempScriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/temp_fetch_page.py'; $pythonCode = << 3 and sys.argv[3] == 'check_cache' if check_cache_only and language == 'fr': # For French pages, construct the URL to check cache if key.startswith('http'): url = key else: url = f"https://wiki.openstreetmap.org/wiki/FR:{key}" # Create cache key cache_key = hashlib.md5(url.encode()).hexdigest() cache_file = Path(HTML_CACHE_DIR) / f"{cache_key}.html" # Check if cache exists if cache_file.exists(): print("CACHE_EXISTS") else: print("CACHE_MISSING") else: # Normal fetch operation page = fetch_wiki_page(key, language) # Output the HTML content if page and 'html_content' in page: print(page['html_content']) else: print("") EOT; file_put_contents($tempScriptPath, $pythonCode); chmod($tempScriptPath, 0755); // First check if French page exists in cache $command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr check_cache"; $cacheCheckResult = trim(shell_exec($command)); $frenchCacheExists = ($cacheCheckResult === "CACHE_EXISTS"); // Fetch English page $command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} en"; $englishHtml = shell_exec($command); // Extract only the content part from the HTML (remove headers, footers, etc.) if ($englishHtml) { $englishHtml = $this->extractMainContent($englishHtml); } // Fetch French page (might not exist, but we'll try) $command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr"; $frenchHtml = shell_exec($command); // Extract only the content part from the HTML (remove headers, footers, etc.) if ($frenchHtml) { $frenchHtml = $this->extractMainContent($frenchHtml); } // Clean up the temporary script unlink($tempScriptPath); } return $this->render('admin/wiki_create_french.html.twig', [ 'key' => $key, 'english_url' => $englishUrl, 'french_edit_url' => $frenchEditUrl, 'english_html' => $englishHtml, 'french_html' => $frenchHtml, 'french_cache_exists' => $frenchCacheExists ]); } #[Route('/wiki/archived-proposals', name: 'app_admin_wiki_archived_proposals')] public function archivedProposals(\Symfony\Component\HttpFoundation\Request $request): Response { $jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/archived_proposals.json'; $forceRefresh = $request->query->has('refresh'); $limit = $request->query->get('limit') ? (int)$request->query->get('limit') : null; // Initialize arrays $proposals = []; $statistics = []; $lastUpdated = null; // Check if we should force a refresh if ($forceRefresh) { $this->refreshArchivedProposalsData($limit); $this->addFlash('success', 'Les données des propositions archivées ont été rafraîchies.'); // Preserve the limit parameter in the redirect if it was provided if ($limit) { return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]); } return $this->redirectToRoute('app_admin_wiki_archived_proposals'); } // Check if the archived proposals file exists and load it if (file_exists($jsonFile)) { // Use memory-efficient approach to extract only the necessary data $maxItems = 100; // Limit the number of items to prevent memory exhaustion // Extract proposals array $proposals = $this->extractJsonArrayByKey($jsonFile, 'proposals', $maxItems); // Extract statistics object $statistics = $this->extractJsonArrayByKey($jsonFile, 'statistics', $maxItems); // Extract last_updated value $lastUpdated = $this->extractJsonScalarByKey($jsonFile, 'last_updated'); // Check if the data is older than 1 day if ($lastUpdated) { $lastUpdatedTime = new \DateTime($lastUpdated); $now = new \DateTime(); $diff = $now->diff($lastUpdatedTime); // If older than 1 day, refresh the data if ($diff->days > 1) { $this->refreshArchivedProposalsData($limit); $this->addFlash('info', 'Les données des propositions archivées ont été automatiquement mises à jour car elles dataient de plus d\'un jour.'); // Preserve the limit parameter in the redirect if it was provided if ($limit) { return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]); } return $this->redirectToRoute('app_admin_wiki_archived_proposals'); } } } else { // If the file doesn't exist, try to create it by running the script $this->refreshArchivedProposalsData($limit); // Check if the file was created if (file_exists($jsonFile)) { $this->addFlash('success', 'Le fichier des propositions archivées a été généré avec succès.'); // Preserve the limit parameter in the redirect if it was provided if ($limit) { return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]); } return $this->redirectToRoute('app_admin_wiki_archived_proposals'); } else { $this->addFlash('error', 'Impossible de générer le fichier des propositions archivées.'); } } return $this->render('admin/wiki_archived_proposals.html.twig', [ 'proposals' => $proposals, 'statistics' => $statistics, 'last_updated' => $lastUpdated, 'limit' => $limit ]); } /** * Refresh the archived proposals data by running the fetch_archived_proposals.py script * * @param int|null $limit Optional limit for the number of proposals to process */ private function refreshArchivedProposalsData(?int $limit = null): void { try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_archived_proposals.py'; if (file_exists($scriptPath)) { $command = 'python3 ' . $scriptPath; // Add limit parameter if provided if ($limit !== null) { $command .= ' --limit ' . $limit; } exec($command . ' 2>&1', $output, $returnCode); if ($returnCode !== 0) { $this->addFlash('warning', 'Impossible de mettre à jour les propositions archivées. Erreur: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script fetch_archived_proposals.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } } #[Route('/wiki', name: 'app_admin_wiki')] public function index(): Response { $csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv'; if (!file_exists($csvFile)) { $this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.'); return $this->redirectToRoute('app_admin_index'); } $csvData = array_map('str_getcsv', file($csvFile)); $headers = array_shift($csvData); $wikiPages = []; $missingTranslations = []; $pageDifferences = []; $pagesUnavailableInEnglish = []; // Collect all staleness scores for statistics $stalenessScores = []; foreach ($csvData as $row) { $page = array_combine($headers, $row); if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) { $stalenessScores[] = (float)$page['staleness_score']; } } // Calculate statistics $stalenessStats = [ 'count' => count($stalenessScores), 'min' => !empty($stalenessScores) ? min($stalenessScores) : 0, 'max' => !empty($stalenessScores) ? max($stalenessScores) : 0, 'mean' => 0, 'std_dev' => 0 ]; // Calculate mean if (!empty($stalenessScores)) { $stalenessStats['mean'] = array_sum($stalenessScores) / count($stalenessScores); // Calculate standard deviation $variance = 0; foreach ($stalenessScores as $score) { $variance += pow($score - $stalenessStats['mean'], 2); } $stalenessStats['std_dev'] = sqrt($variance / count($stalenessScores)); } // Round statistics to 2 decimal places $stalenessStats['mean'] = round($stalenessStats['mean'], 2); $stalenessStats['std_dev'] = round($stalenessStats['std_dev'], 2); // Process pages - use absolute values without normalization foreach ($csvData as $row) { $page = array_combine($headers, $row); // Use absolute values of staleness score without normalization if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) { $page['staleness_score'] = abs((float)$page['staleness_score']); // Round to 2 decimal places $page['staleness_score'] = round($page['staleness_score'], 2); } $wikiPages[$page['key']][$page['language']] = $page; } // Identify pages missing French translations foreach ($wikiPages as $key => $languages) { if (isset($languages['en']) && !isset($languages['fr'])) { $missingTranslations[$key] = $languages['en']; } } // Prepare arrays for statistics $stats = [ 'en_sections' => [], 'fr_sections' => [], 'en_words' => [], 'fr_words' => [], 'en_links' => [], 'fr_links' => [], 'en_media' => [], 'fr_media' => [] ]; // Calculate differences between English and French versions foreach ($wikiPages as $key => $languages) { if (isset($languages['en']) && isset($languages['fr'])) { $en = $languages['en']; $fr = $languages['fr']; // Calculate differences (French - English) $sectionDiff = (int)$fr['sections'] - (int)$en['sections']; $wordDiff = (int)$fr['word_count'] - (int)$en['word_count']; $linkDiff = (int)$fr['link_count'] - (int)$en['link_count']; $mediaDiff = isset($fr['media_count']) && isset($en['media_count']) ? (int)$fr['media_count'] - (int)$en['media_count'] : 0; // Format differences with + or - sign $pageDifferences[$key] = [ 'section_diff' => $sectionDiff, 'section_diff_formatted' => ($sectionDiff >= 0 ? '+' : '') . $sectionDiff, 'word_diff' => $wordDiff, 'word_diff_formatted' => ($wordDiff >= 0 ? '+' : '') . $wordDiff, 'link_diff' => $linkDiff, 'link_diff_formatted' => ($linkDiff >= 0 ? '+' : '') . $linkDiff, 'media_diff' => $mediaDiff, 'media_diff_formatted' => ($mediaDiff >= 0 ? '+' : '') . $mediaDiff, ]; // Collect data for statistics $stats['en_sections'][] = (int)$en['sections']; $stats['fr_sections'][] = (int)$fr['sections']; $stats['en_words'][] = (int)$en['word_count']; $stats['fr_words'][] = (int)$fr['word_count']; $stats['en_links'][] = (int)$en['link_count']; $stats['fr_links'][] = (int)$fr['link_count']; $stats['en_media'][] = isset($en['media_count']) ? (int)$en['media_count'] : 0; $stats['fr_media'][] = isset($fr['media_count']) ? (int)$fr['media_count'] : 0; } } // Calculate statistics $wikiPagesStats = []; foreach ($stats as $key => $values) { if (!empty($values)) { $mean = array_sum($values) / count($values); // Calculate standard deviation $variance = 0; foreach ($values as $value) { $variance += pow($value - $mean, 2); } $stdDev = sqrt($variance / count($values)); $wikiPagesStats[$key] = [ 'count' => count($values), 'min' => min($values), 'max' => max($values), 'mean' => round($mean, 2), 'std_dev' => round($stdDev, 2) ]; } } // Sort wiki pages by staleness score (descending) uasort($wikiPages, function ($a, $b) { $scoreA = isset($a['en']) && isset($a['fr']) && isset($a['en']['staleness_score']) ? (float)$a['en']['staleness_score'] : 0; $scoreB = isset($b['en']) && isset($b['fr']) && isset($b['en']['staleness_score']) ? (float)$b['en']['staleness_score'] : 0; return $scoreB <=> $scoreA; }); // Load pages unavailable in English $pagesUnavailableInEnglishFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/pages_unavailable_in_english.json'; if (file_exists($pagesUnavailableInEnglishFile)) { $pagesUnavailableInEnglishData = json_decode(file_get_contents($pagesUnavailableInEnglishFile), true); if (isset($pagesUnavailableInEnglishData['pages']) && is_array($pagesUnavailableInEnglishData['pages'])) { // Deduplicate pages based on URL $uniquePages = []; $seenUrls = []; foreach ($pagesUnavailableInEnglishData['pages'] as $page) { if (isset($page['url'])) { // Use URL as the key for deduplication $url = $page['url']; if (!isset($seenUrls[$url])) { $seenUrls[$url] = true; $uniquePages[] = $page; } } else { // If no URL, keep the page (shouldn't happen, but just in case) $uniquePages[] = $page; } } $pagesUnavailableInEnglish = $uniquePages; } } // Load specific pages from outdated_pages.json $specificPages = []; $outdatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json'; if (file_exists($outdatedPagesFile)) { // Use a memory-efficient approach to extract only the specific_pages array // without loading the entire file into memory $maxPages = 100; // Limit the number of pages to prevent memory exhaustion $specificPages = $this->extractSpecificPagesFromJson($outdatedPagesFile, $maxPages); } // Load newly created French pages $newlyCreatedPages = []; $newlyCreatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/newly_created_french_pages.json'; if (file_exists($newlyCreatedPagesFile)) { $newlyCreatedPagesData = json_decode(file_get_contents($newlyCreatedPagesFile), true); if (isset($newlyCreatedPagesData['created_pages']) && is_array($newlyCreatedPagesData['created_pages'])) { $newlyCreatedPages = $newlyCreatedPagesData['created_pages']; } } // Load machine translations $availableTranslations = []; $translationsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/translations.json'; if (file_exists($translationsFile)) { $translationsData = json_decode(file_get_contents($translationsFile), true); if (isset($translationsData['translations']) && is_array($translationsData['translations'])) { $availableTranslations = $translationsData['translations']; } } // Load keys without wiki pages $keysWithoutWiki = []; $keysWithoutWikiFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/keys_without_wiki.json'; if (file_exists($keysWithoutWikiFile)) { $keysWithoutWikiData = json_decode(file_get_contents($keysWithoutWikiFile), true); if (is_array($keysWithoutWikiData)) { $keysWithoutWiki = $keysWithoutWikiData; } } return $this->render('admin/wiki.html.twig', [ 'wiki_pages' => $wikiPages, 'missing_translations' => $missingTranslations, 'page_differences' => $pageDifferences, 'pages_unavailable_in_english' => $pagesUnavailableInEnglish, 'specific_pages' => $specificPages, 'newly_created_pages' => $newlyCreatedPages, 'staleness_stats' => $stalenessStats, 'wiki_pages_stats' => $wikiPagesStats, 'available_translations' => $availableTranslations, 'keys_without_wiki' => $keysWithoutWiki ]); } #[Route('/wiki/translate/{key}', name: 'app_admin_wiki_translate', requirements: ['key' => '.+'])] public function translate(string $key): Response { $this->addFlash('info', 'Traduction en cours pour la page ' . $key); try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_translate.py'; if (file_exists($scriptPath)) { // Execute the translation script with virtual environment $venvPython = $this->getParameter('kernel.project_dir') . '/venv/bin/python'; $command = 'cd ' . $this->getParameter('kernel.project_dir') . ' && ' . $venvPython . ' ' . $scriptPath . ' "' . $key . '"'; $output = []; $returnVar = 0; exec($command, $output, $returnVar); if ($returnVar === 0) { $this->addFlash('success', 'Traduction réussie pour la page ' . $key); } else { $this->addFlash('warning', 'Problème lors de la traduction: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script wiki_translate.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } return $this->redirectToRoute('app_admin_wiki'); } #[Route('/wiki/update-translation/{key}', name: 'app_admin_wiki_update_translation', requirements: ['key' => '.+'])] public function updateTranslation(string $key): Response { $this->addFlash('info', 'Mise à jour de la traduction en cours pour la page ' . $key); try { $scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_translate.py'; if (file_exists($scriptPath)) { // Execute the translation script with the update flag and virtual environment $venvPython = $this->getParameter('kernel.project_dir') . '/venv/bin/python'; $command = 'cd ' . $this->getParameter('kernel.project_dir') . ' && ' . $venvPython . ' ' . $scriptPath . ' "' . $key . '"'; $output = []; $returnVar = 0; exec($command, $output, $returnVar); if ($returnVar === 0) { $this->addFlash('success', 'Mise à jour de la traduction réussie pour la page ' . $key); } else { $this->addFlash('warning', 'Problème lors de la mise à jour de la traduction: ' . implode("\n", $output)); } } else { $this->addFlash('error', 'Le script wiki_translate.py n\'existe pas.'); } } catch (\Exception $e) { $this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage()); } return $this->redirectToRoute('app_admin_wiki'); } #[Route('/wiki/compare/{key}', name: 'app_admin_wiki_compare', requirements: ['key' => '.+'])] public function compare(string $key): Response { $csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv'; $jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json'; if (!file_exists($csvFile)) { $this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.'); return $this->redirectToRoute('app_admin_index'); } $csvData = array_map('str_getcsv', file($csvFile)); $headers = array_shift($csvData); // Process CSV data to find the requested key $enPage = null; $frPage = null; foreach ($csvData as $row) { $page = array_combine($headers, $row); if ($page['key'] === $key) { if ($page['language'] === 'en') { $enPage = $page; } elseif ($page['language'] === 'fr') { $frPage = $page; } } } // If English page doesn't exist, redirect back with error if (!$enPage) { $this->addFlash('error', 'La page wiki pour la clé "' . $key . '" n\'existe pas.'); return $this->redirectToRoute('app_admin_wiki'); } // Get detailed content comparison from JSON file $detailedComparison = null; $mediaDiff = 0; $historyData = null; $prevPage = null; $nextPage = null; $stalenessDistribution = null; if (file_exists($jsonFile)) { // Use memory-efficient approach to extract only the necessary data $maxItems = 100; // Limit the number of items to prevent memory exhaustion // Extract history data if available $historyData = []; // Get history data from the JSON file $historyEntries = $this->extractJsonArrayByKey($jsonFile, 'history', $maxItems); // Process history data for the current key foreach ($historyEntries as $timestamp => $entry) { $historyEntry = [ 'timestamp' => $timestamp, 'date' => is_string($timestamp) && !empty($timestamp) && $timestamp !== '0' ? (new \DateTime($timestamp))->format('Y-m-d') : 'N/A', 'metrics' => [] ]; // Check regular_pages if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) { foreach ($entry['regular_pages'] as $page) { if (isset($page['key']) && $page['key'] === $key) { // Extract metrics $historyEntry['metrics'] = [ 'staleness_score' => $page['staleness_score'] ?? 0, 'date_diff' => $page['date_diff'] ?? 0, 'word_diff' => $page['word_diff'] ?? 0, 'section_diff' => $page['section_diff'] ?? 0, 'link_diff' => $page['link_diff'] ?? 0, 'media_diff' => $page['media_diff'] ?? 0 ]; $historyData[] = $historyEntry; break; } } } // If not found in regular_pages, check specific_pages if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) { foreach ($entry['specific_pages'] as $page) { if (isset($page['key']) && $page['key'] === $key) { // Extract metrics $historyEntry['metrics'] = [ 'staleness_score' => $page['staleness_score'] ?? 0, 'date_diff' => $page['date_diff'] ?? 0, 'word_diff' => $page['word_diff'] ?? 0, 'section_diff' => $page['section_diff'] ?? 0, 'link_diff' => $page['link_diff'] ?? 0, 'media_diff' => $page['media_diff'] ?? 0 ]; $historyData[] = $historyEntry; break; } } } } // Sort history data by timestamp usort($historyData, function($a, $b) { return strtotime($a['timestamp']) - strtotime($b['timestamp']); }); // Get regular_pages and specific_pages arrays $regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems); $specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems); // Combine them into a single array $allPages = array_merge($regularPages, $specificPages); // Sort pages by staleness score (descending) usort($allPages, function($a, $b) { $scoreA = $a['staleness_score'] ?? 0; $scoreB = $b['staleness_score'] ?? 0; return $scoreB <=> $scoreA; // Descending order }); // Find the current page index in the sorted array $currentIndex = -1; foreach ($allPages as $index => $page) { if (isset($page['key']) && $page['key'] === $key) { $currentIndex = $index; break; } } // Determine previous and next pages if ($currentIndex > 0) { $prevPage = $allPages[$currentIndex - 1]; } if ($currentIndex < count($allPages) - 1 && $currentIndex >= 0) { $nextPage = $allPages[$currentIndex + 1]; } // Create staleness score distribution data for histogram $stalenessScores = []; foreach ($allPages as $page) { if (isset($page['staleness_score'])) { $stalenessScores[] = $page['staleness_score']; } } if (!empty($stalenessScores)) { // Calculate statistics $min = min($stalenessScores); $max = max($stalenessScores); $avg = array_sum($stalenessScores) / count($stalenessScores); $median = $this->calculateMedian($stalenessScores); // Create histogram bins (10 bins) $binCount = 10; $binSize = ($max - $min) / $binCount; $bins = []; $binLabels = []; // Initialize bins for ($i = 0; $i < $binCount; $i++) { $bins[$i] = 0; $binStart = $min + ($i * $binSize); $binEnd = $binStart + $binSize; $binLabels[$i] = round($binStart, 1) . ' - ' . round($binEnd, 1); } // Count scores in each bin foreach ($stalenessScores as $score) { $binIndex = min($binCount - 1, floor(($score - $min) / $binSize)); $bins[$binIndex]++; } // Find which bin the current page falls into $currentPageScore = 0; foreach ($allPages as $page) { if (isset($page['key']) && $page['key'] === $key && isset($page['staleness_score'])) { $currentPageScore = $page['staleness_score']; break; } } $currentPageBin = min($binCount - 1, floor(($currentPageScore - $min) / $binSize)); $stalenessDistribution = [ 'scores' => $stalenessScores, 'min' => $min, 'max' => $max, 'avg' => $avg, 'median' => $median, 'bins' => $bins, 'binLabels' => $binLabels, 'currentPageScore' => $currentPageScore, 'currentPageBin' => $currentPageBin, 'totalPages' => count($stalenessScores) ]; } // Find the page with the matching key foreach ($allPages as $page) { if (isset($page['key']) && $page['key'] === $key) { $mediaComparison = $page['media_comparison'] ?? null; // Deduplicate images by URL in the controller and filter out images that appear in both languages if ($mediaComparison) { // Get all image URLs from both languages $enOnlyImages = $mediaComparison['en_only'] ?? []; $frOnlyImages = $mediaComparison['fr_only'] ?? []; $commonImages = $mediaComparison['common'] ?? []; // Extract all URLs from French images $frImageUrls = []; foreach ($frOnlyImages as $media) { $frImageUrls[] = $media['src']; } // Also add URLs from common images (French side) foreach ($commonImages as $commonMedia) { if (isset($commonMedia['fr']['src'])) { $frImageUrls[] = $commonMedia['fr']['src']; } } // Extract all URLs from English images $enImageUrls = []; foreach ($enOnlyImages as $media) { $enImageUrls[] = $media['src']; } // Also add URLs from common images (English side) foreach ($commonImages as $commonMedia) { if (isset($commonMedia['en']['src'])) { $enImageUrls[] = $commonMedia['en']['src']; } } // Process English-only images - deduplicate and filter out those that appear in French $enUniqueImages = []; $enProcessedUrls = []; foreach ($enOnlyImages as $media) { // Skip if this URL is already processed or if it appears in French images if (!in_array($media['src'], $enProcessedUrls) && !in_array($media['src'], $frImageUrls)) { $enProcessedUrls[] = $media['src']; $enUniqueImages[] = $media; } } // Process French-only images - deduplicate and filter out those that appear in English $frUniqueImages = []; $frProcessedUrls = []; foreach ($frOnlyImages as $media) { // Skip if this URL is already processed or if it appears in English images if (!in_array($media['src'], $frProcessedUrls) && !in_array($media['src'], $enImageUrls)) { $frProcessedUrls[] = $media['src']; $frUniqueImages[] = $media; } } // Replace the arrays with deduplicated and filtered versions $mediaComparison['en_only'] = $enUniqueImages; $mediaComparison['fr_only'] = $frUniqueImages; $mediaComparison['en_only_count'] = count($enOnlyImages); $mediaComparison['fr_only_count'] = count($frOnlyImages); } // Get link comparison data $linkComparison = $page['link_comparison'] ?? null; // Sort links alphabetically by URL if link comparison exists if ($linkComparison) { // Sort English-only links if (isset($linkComparison['en_only']) && is_array($linkComparison['en_only'])) { usort($linkComparison['en_only'], function ($a, $b) { return strcmp($a['href'], $b['href']); }); } // Sort French-only links if (isset($linkComparison['fr_only']) && is_array($linkComparison['fr_only'])) { usort($linkComparison['fr_only'], function ($a, $b) { return strcmp($a['href'], $b['href']); }); } // Sort common links if (isset($linkComparison['common']) && is_array($linkComparison['common'])) { usort($linkComparison['common'], function ($a, $b) { return strcmp($a['en']['href'], $b['en']['href']); }); } } // Get section comparison data and filter out "Contents" sections and navigation sections $sectionComparison = $page['section_comparison'] ?? null; // Sections to exclude from comparison (navigation elements) $excludedSections = [ 'Contents', 'Sommaire', 'Personal tools', 'Namespaces', 'Views', 'Search', 'Site', 'Tools', 'In other projects' ]; // Filter out excluded sections if section comparison exists if ($sectionComparison) { // Filter common sections if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { $sectionComparison['common'] = array_filter($sectionComparison['common'], function ($section) use ($excludedSections) { // Skip if either English or French title is in the excluded list return !(in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections)); }); // Re-index array $sectionComparison['common'] = array_values($sectionComparison['common']); } // Filter English-only sections if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) { $sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function ($section) use ($excludedSections) { return !in_array($section['title'], $excludedSections); }); // Re-index array $sectionComparison['en_only'] = array_values($sectionComparison['en_only']); } // Filter French-only sections if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) { $sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function ($section) use ($excludedSections) { return !in_array($section['title'], $excludedSections); }); // Re-index array $sectionComparison['fr_only'] = array_values($sectionComparison['fr_only']); } } // Calculate adjusted section counts (excluding "Contents" sections) $enSectionCount = $enPage['sections']; $frSectionCount = $frPage['sections']; // Adjust section counts if we have section comparison data if ($sectionComparison) { // Count how many sections were filtered out $filteredCount = 0; // Check common sections that were filtered if (isset($page['section_comparison']['common']) && is_array($page['section_comparison']['common'])) { foreach ($page['section_comparison']['common'] as $section) { if (in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections)) { $filteredCount++; } } } // Check English-only sections that were filtered if (isset($page['section_comparison']['en_only']) && is_array($page['section_comparison']['en_only'])) { foreach ($page['section_comparison']['en_only'] as $section) { if (in_array($section['title'], $excludedSections)) { $filteredCount++; } } } // Check French-only sections that were filtered if (isset($page['section_comparison']['fr_only']) && is_array($page['section_comparison']['fr_only'])) { foreach ($page['section_comparison']['fr_only'] as $section) { if (in_array($section['title'], $excludedSections)) { $filteredCount++; } } } // Adjust section counts $enSectionCount -= $filteredCount; $frSectionCount -= $filteredCount; } // Check for incorrect heading hierarchies $enHierarchyErrors = []; $frHierarchyErrors = []; // Check English sections if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) { $enHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['en_only']); } // Also check common sections (English side) if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { $commonEnSections = array_map(function ($section) { return $section['en']; }, $sectionComparison['common']); $enHierarchyErrors = array_merge($enHierarchyErrors, $this->detectHeadingHierarchyErrors($commonEnSections)); } // Check French sections if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) { $frHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['fr_only']); } // Also check common sections (French side) if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) { $commonFrSections = array_map(function ($section) { return $section['fr']; }, $sectionComparison['common']); $frHierarchyErrors = array_merge($frHierarchyErrors, $this->detectHeadingHierarchyErrors($commonFrSections)); } // Build aligned section list for better visualization of missing sections $alignedSections = $this->buildAlignedSectionList($sectionComparison); $detailedComparison = [ 'section_comparison' => $sectionComparison, 'aligned_sections' => $alignedSections, 'link_comparison' => $linkComparison, 'media_comparison' => $mediaComparison, 'category_comparison' => $page['category_comparison'] ?? null, 'grammar_suggestions' => $page['grammar_suggestions'] ?? null, 'adjusted_en_section_count' => $enSectionCount, 'adjusted_fr_section_count' => $frSectionCount, 'en_hierarchy_errors' => $enHierarchyErrors, 'fr_hierarchy_errors' => $frHierarchyErrors ]; $mediaDiff = $page['media_diff'] ?? 0; break; } } } // Calculate staleness score components $scoreComponents = []; if ($frPage) { // Calculate date difference in days $dateDiff = 0; if ($enPage['last_modified'] && $frPage['last_modified']) { $enDate = \DateTime::createFromFormat('Y-m-d', $enPage['last_modified']); $frDate = \DateTime::createFromFormat('Y-m-d', $frPage['last_modified']); if ($enDate && $frDate) { $dateDiff = ($enDate->getTimestamp() - $frDate->getTimestamp()) / (60 * 60 * 24); } } // Calculate content differences $wordDiff = $enPage['word_count'] - $frPage['word_count']; $sectionDiff = $enPage['sections'] - $frPage['sections']; $linkDiff = $enPage['link_count'] - $frPage['link_count']; // Calculate score components $dateComponent = abs($dateDiff) * 0.2; $wordComponent = (abs($wordDiff) / 100) * 0.5; $sectionComponent = abs($sectionDiff) * 0.15; $linkComponent = (abs($linkDiff) / 10) * 0.15; $scoreComponents = [ 'date' => [ 'value' => $dateDiff, 'weight' => 0.2, 'component' => $dateComponent, 'description' => 'Différence de date (en jours)' ], 'word' => [ 'value' => $wordDiff, 'weight' => 0.5, 'component' => $wordComponent, 'description' => 'Différence de nombre de mots' ], 'section' => [ 'value' => $sectionDiff, 'weight' => 0.15, 'component' => $sectionComponent, 'description' => 'Différence de nombre de sections' ], 'link' => [ 'value' => $linkDiff, 'weight' => 0.15, 'component' => $linkComponent, 'description' => 'Différence de nombre de liens' ] ]; // Add media component if available if (isset($enPage['media_count']) && isset($frPage['media_count'])) { $mediaComponent = (abs($mediaDiff) / 5) * 0.1; $scoreComponents['media'] = [ 'value' => $mediaDiff, 'weight' => 0.1, 'component' => $mediaComponent, 'description' => 'Différence de nombre d\'images' ]; // Adjust other weights to maintain total of 1.0 $scoreComponents['date']['weight'] = 0.2; $scoreComponents['word']['weight'] = 0.45; $scoreComponents['section']['weight'] = 0.15; $scoreComponents['link']['weight'] = 0.1; } } // Create URL for new French page if it doesn't exist $createFrUrl = null; if (!$frPage) { $createFrUrl = 'https://wiki.openstreetmap.org/wiki/FR:' . $key; } // Format section titles for copy functionality $enSections = ''; $frSections = ''; if ($detailedComparison && $detailedComparison['section_comparison']) { // English sections if ($enPage) { $enSectionsList = []; // Add common sections foreach ($detailedComparison['section_comparison']['common'] as $section) { $enSectionsList[] = str_repeat('=', $section['en']['level']) . ' ' . $section['en']['title'] . ' ' . str_repeat('=', $section['en']['level']); } // Add English-only sections foreach ($detailedComparison['section_comparison']['en_only'] as $section) { $enSectionsList[] = str_repeat('=', $section['level']) . ' ' . $section['title'] . ' ' . str_repeat('=', $section['level']) . ' (EN only)'; } $enSections = implode("\n", $enSectionsList); } // French sections if ($frPage) { $frSectionsList = []; // Add common sections foreach ($detailedComparison['section_comparison']['common'] as $section) { $frSectionsList[] = str_repeat('=', $section['fr']['level']) . ' ' . $section['fr']['title'] . ' ' . str_repeat('=', $section['fr']['level']); } // Add French-only sections foreach ($detailedComparison['section_comparison']['fr_only'] as $section) { $frSectionsList[] = str_repeat('=', $section['level']) . ' ' . $section['title'] . ' ' . str_repeat('=', $section['level']) . ' (FR only)'; } $frSections = implode("\n", $frSectionsList); } } // Format links for copy functionality $enLinks = ''; $frLinks = ''; if ($detailedComparison && $detailedComparison['link_comparison']) { // English links if ($enPage) { $enLinksList = []; // Add common links foreach ($detailedComparison['link_comparison']['common'] as $link) { $enLinksList[] = $link['en']['text'] . ' - ' . $link['en']['href']; } // Add English-only links foreach ($detailedComparison['link_comparison']['en_only'] as $link) { $enLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (EN only)'; } $enLinks = implode("\n", $enLinksList); } // French links if ($frPage) { $frLinksList = []; // Add common links foreach ($detailedComparison['link_comparison']['common'] as $link) { $frLinksList[] = $link['fr']['text'] . ' - ' . $link['fr']['href']; } // Add French-only links foreach ($detailedComparison['link_comparison']['fr_only'] as $link) { $frLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (FR only)'; } $frLinks = implode("\n", $frLinksList); } } // Ensure page URLs are strings to prevent array to string conversion errors if ($frPage && isset($frPage['url']) && is_array($frPage['url'])) { $frPage['url'] = json_encode($frPage['url']); } if ($enPage && isset($enPage['url']) && is_array($enPage['url'])) { $enPage['url'] = json_encode($enPage['url']); } return $this->render('admin/wiki_compare.html.twig', [ 'key' => $key, 'en_page' => $enPage, 'fr_page' => $frPage, 'score_components' => $scoreComponents, 'create_fr_url' => $createFrUrl, 'detailed_comparison' => $detailedComparison, 'en_sections' => $enSections, 'fr_sections' => $frSections, 'en_links' => $enLinks, 'fr_links' => $frLinks, 'history_data' => $historyData, 'prev_page' => $prevPage, 'next_page' => $nextPage, 'staleness_distribution' => $stalenessDistribution ]); } /** * Extracts the main content from the HTML, removing headers, footers, and other unnecessary elements * * @param string $html The full HTML content * @return string The extracted main content */ private function extractMainContent(string $html): string { // Use a simple approach to extract the content // This could be improved with a more sophisticated HTML parser if needed // Create a DOMDocument to parse the HTML $dom = new \DOMDocument(); // Suppress warnings about malformed HTML libxml_use_internal_errors(true); $dom->loadHTML($html); libxml_clear_errors(); // Try to find the main content element $contentElement = null; // First, try to find the element with id "mw-content-text" $contentElement = $dom->getElementById('mw-content-text'); // If not found, try to find the element with class "mw-content-ltr" if (!$contentElement) { $xpath = new \DOMXPath($dom); $elements = $xpath->query("//*[contains(@class, 'mw-content-ltr')]"); if ($elements->length > 0) { $contentElement = $elements->item(0); } } // If still not found, return the original HTML if (!$contentElement) { return $html; } // Get the HTML of the content element $contentHtml = $dom->saveHTML($contentElement); // Clean up the content HTML // Remove script and style elements $contentHtml = preg_replace('/]*>(.*?)<\/script>/is', '', $contentHtml); $contentHtml = preg_replace('/]*>(.*?)<\/style>/is', '', $contentHtml); // Remove edit section links $contentHtml = preg_replace('/(.*?)<\/span>/is', '', $contentHtml); return $contentHtml; } /** * Extracts an array from a large JSON file by key without loading the entire file into memory * * @param string $filePath Path to the JSON file * @param string $key The key of the array to extract * @param int $maxItems Maximum number of items to extract (to prevent memory exhaustion) * @return array The extracted array */ private function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array { $result = []; // First, check if the file exists and is readable if (!is_readable($filePath)) { error_log("File is not readable: $filePath"); return $result; } // Get the file size $fileSize = filesize($filePath); if ($fileSize === false || $fileSize === 0) { error_log("File is empty or size could not be determined: $filePath"); return $result; } try { // For very large files, we'll use a more efficient approach // We'll search for the specified key directly $handle = fopen($filePath, 'r'); if (!$handle) { error_log("Could not open file: $filePath"); return $result; } // Variables to track parsing state $bracketCount = 0; $buffer = ''; $itemCount = 0; $inArray = false; $arrayStarted = false; // Skip ahead to find the specified key more quickly $found = false; $searchKey = '"' . $key . '"'; while (!$found && ($line = fgets($handle)) !== false) { if (strpos($line, $searchKey) !== false) { $found = true; // Extract everything after the key $keyPos = strpos($line, $searchKey); $afterKey = substr($line, $keyPos + strlen($searchKey)); // Find the colon and then the opening bracket if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) { $inArray = true; $arrayStarted = true; $bracketPos = strpos($afterKey, '['); $buffer = '['; // Start the buffer with an opening bracket $bracketCount = 1; // Add everything after the opening bracket to the buffer $buffer .= substr($afterKey, $bracketPos + 1); } else if (strpos($afterKey, ':') !== false) { // The opening bracket might be on the next line $inArray = true; } break; } } // If we didn't find the key, return empty array if (!$found) { fclose($handle); error_log("Key '$key' not found in file: $filePath"); return $result; } // If we found the key but not the opening bracket yet, look for it if ($inArray && !$arrayStarted) { while (($line = fgets($handle)) !== false) { if (strpos($line, '[') !== false) { $bracketPos = strpos($line, '['); $buffer = '['; // Start the buffer with an opening bracket $bracketCount = 1; $arrayStarted = true; // Add everything after the opening bracket to the buffer $buffer .= substr($line, $bracketPos + 1); break; } } } // If we still haven't found the opening bracket, something is wrong if (!$arrayStarted) { fclose($handle); error_log("Could not find opening bracket for array '$key' in file: $filePath"); return $result; } // Now process the array $collectingItems = true; while ($collectingItems && ($line = fgets($handle)) !== false) { // Count opening and closing brackets to track array nesting $openBrackets = substr_count($line, '[') + substr_count($line, '{'); $closeBrackets = substr_count($line, ']') + substr_count($line, '}'); $bracketCount += $openBrackets - $closeBrackets; // Add the line to our buffer $buffer .= $line; // If we've reached the end of the array (bracketCount = 0) if ($bracketCount === 0) { $collectingItems = false; // Try to parse the buffer as JSON try { $parsedData = json_decode($buffer, true); if (json_last_error() !== JSON_ERROR_NONE) { error_log("JSON parse error: " . json_last_error_msg() . " for key '$key'"); // Try a different approach - manually construct a valid JSON array // Split the buffer by objects (each starting with { and ending with }) preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); if (!empty($matches[0])) { // Take the first $maxItems objects $objects = array_slice($matches[0], 0, $maxItems); // Construct a valid JSON array $validJson = '[' . implode(',', $objects) . ']'; // Try to parse the valid JSON $parsedData = json_decode($validJson, true); if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { $result = $parsedData; } else { error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'"); } } } else if (is_array($parsedData)) { // Limit the number of items to prevent memory exhaustion $result = array_slice($parsedData, 0, $maxItems); } } catch (\Exception $e) { error_log("Exception parsing JSON for key '$key': " . $e->getMessage()); } break; } // Check if we've found a complete item (when we see a closing brace followed by a comma) // This is used to count items and limit the number of items processed if (preg_match('/\}\s*,\s*$/m', $line)) { $itemCount++; // If we've reached the maximum number of items, stop processing if ($itemCount >= $maxItems) { $collectingItems = false; // Create a valid JSON array with the items we've collected so far // We need to ensure the buffer ends with a complete JSON object and a closing bracket // First, find the last complete object (ending with }) $lastObjectEnd = strrpos($buffer, '}'); if ($lastObjectEnd !== false) { // Truncate the buffer at the end of the last complete object $buffer = substr($buffer, 0, $lastObjectEnd + 1); // Add the closing bracket for the array $buffer .= ']'; // Try to parse the buffer as JSON try { $parsedData = json_decode($buffer, true); if (json_last_error() !== JSON_ERROR_NONE) { error_log("JSON parse error after max items: " . json_last_error_msg() . " for key '$key'"); // Try a different approach - manually construct a valid JSON array // Split the buffer by objects (each starting with { and ending with }) preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); if (!empty($matches[0])) { // Take the first $maxItems objects $objects = array_slice($matches[0], 0, $maxItems); // Construct a valid JSON array $validJson = '[' . implode(',', $objects) . ']'; // Try to parse the valid JSON $parsedData = json_decode($validJson, true); if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { $result = $parsedData; } else { error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'"); } } } else if (is_array($parsedData)) { $result = $parsedData; } } catch (\Exception $e) { error_log("Exception parsing JSON after max items for key '$key': " . $e->getMessage()); } } else { error_log("Could not find the end of the last complete object for key '$key'"); } break; } } } // Close the file fclose($handle); } catch (\Exception $e) { error_log("Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage()); } return $result; } /** * Extracts a scalar value from a large JSON file by key without loading the entire file into memory * * @param string $filePath Path to the JSON file * @param string $key The key of the scalar value to extract * @return mixed The extracted scalar value or null if not found */ private function extractJsonScalarByKey(string $filePath, string $key): mixed { // First, check if the file exists and is readable if (!is_readable($filePath)) { error_log("File is not readable: $filePath"); return null; } try { // For very large files, we'll use a more efficient approach // We'll search for the specified key directly $handle = fopen($filePath, 'r'); if (!$handle) { error_log("Could not open file: $filePath"); return null; } // Skip ahead to find the specified key more quickly $found = false; $searchKey = '"' . $key . '"'; $value = null; while (!$found && ($line = fgets($handle)) !== false) { if (strpos($line, $searchKey) !== false) { $found = true; // Extract everything after the key $keyPos = strpos($line, $searchKey); $afterKey = substr($line, $keyPos + strlen($searchKey)); // Check if the value is on this line if (strpos($afterKey, ':') !== false) { $colonPos = strpos($afterKey, ':'); $afterColon = trim(substr($afterKey, $colonPos + 1)); // Extract the value based on its type if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) { // String value $value = $matches[1]; } elseif (preg_match('/^(\d+)/', $afterColon, $matches)) { // Numeric value $value = intval($matches[1]); } elseif (preg_match('/^(true|false)/', $afterColon, $matches)) { // Boolean value $value = ($matches[1] === 'true'); } elseif (strpos($afterColon, 'null') === 0) { // Null value $value = null; } else { // The value might be on the next line or more complex // For simplicity, we'll just use the regex approach as a fallback if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) { // String value $value = $matches[1]; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) { // Numeric value $value = intval($matches[1]); } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) { // Boolean value $value = ($matches[1] === 'true'); } elseif (strpos($line, 'null') !== false) { // Null value $value = null; } else { error_log("Could not extract value for key '$key' from line: " . trim($line)); } } } else { // The value might be on the next line error_log("Value for key '$key' might be on the next line, using fallback method"); // Read the next line $nextLine = fgets($handle); if ($nextLine !== false) { $combinedLine = $line . $nextLine; // Try to extract the value using regex if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) { // String value $value = $matches[1]; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) { // Numeric value $value = intval($matches[1]); } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) { // Boolean value $value = ($matches[1] === 'true'); } elseif (strpos($combinedLine, 'null') !== false) { // Null value $value = null; } else { error_log("Could not extract value for key '$key' from combined lines"); } } } break; } } // Close the file fclose($handle); if (!$found) { error_log("Key '$key' not found in file: $filePath"); } else if ($value === null) { error_log("Value for key '$key' is null or could not be extracted"); } return $value; } catch (\Exception $e) { error_log("Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage()); return null; } } /** * Extracts the specific_pages array from a large JSON file without loading the entire file into memory * This is a legacy method kept for backward compatibility * * @param string $filePath Path to the JSON file * @param int $maxPages Maximum number of pages to extract (to prevent memory exhaustion) * @return array The extracted specific_pages array */ private function extractSpecificPagesFromJson(string $filePath, int $maxPages = 100): array { return $this->extractJsonArrayByKey($filePath, 'specific_pages', $maxPages); } /** * Calculate the median value of an array of numbers * * @param array $array Array of numbers * @return float The median value */ private function calculateMedian(array $array): float { sort($array); $count = count($array); if ($count === 0) { return 0; } $middle = floor($count / 2); if ($count % 2 === 0) { // Even number of elements, average the two middle values return ($array[$middle - 1] + $array[$middle]) / 2; } else { // Odd number of elements, return the middle value return $array[$middle]; } } }