2370 lines
No EOL
104 KiB
PHP
2370 lines
No EOL
104 KiB
PHP
<?php
|
||
|
||
namespace App\Controller;
|
||
|
||
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
|
||
use Symfony\Component\HttpFoundation\Response;
|
||
use Symfony\Component\Routing\Annotation\Route;
|
||
|
||
class WikiController extends AbstractController
|
||
{
|
||
/**
|
||
* Displays the evolution of decrepitude scores from JSON history data
|
||
*/
|
||
#[Route('/wiki/decrepitude', name: 'app_admin_wiki_decrepitude')]
|
||
public function decrepitudeScores(): Response
|
||
{
|
||
$outdatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
|
||
$histogramFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/staleness_histogram.png';
|
||
|
||
$regularPages = [];
|
||
$specificPages = [];
|
||
$lastUpdated = null;
|
||
$histogramExists = file_exists($histogramFile);
|
||
|
||
if (file_exists($outdatedPagesFile)) {
|
||
// Use memory-efficient approach to extract data from the large JSON file
|
||
$maxPages = 100; // Limit the number of pages to prevent memory exhaustion
|
||
|
||
// Extract regular_pages array
|
||
$regularPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'regular_pages', $maxPages);
|
||
|
||
// Extract specific_pages array
|
||
$specificPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'specific_pages', $maxPages);
|
||
|
||
// Extract last_updated value
|
||
$lastUpdated = $this->extractJsonScalarByKey($outdatedPagesFile, 'last_updated');
|
||
}
|
||
|
||
return $this->render('admin/wiki_decrepitude.html.twig', [
|
||
'regular_pages' => $regularPages,
|
||
'specific_pages' => $specificPages,
|
||
'last_updated' => $lastUpdated,
|
||
'histogram_exists' => $histogramExists,
|
||
'json_exists' => file_exists($outdatedPagesFile)
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Displays the evolution of page rankings over time
|
||
*/
|
||
#[Route('/wiki/rankings', name: 'app_admin_wiki_rankings')]
|
||
public function pageRankings(): Response
|
||
{
|
||
$rankingsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/page_rankings.json';
|
||
|
||
$timestamps = [];
|
||
$pages = [];
|
||
$globalMetrics = [];
|
||
$lastUpdated = null;
|
||
|
||
if (file_exists($rankingsFile)) {
|
||
// Load the rankings data
|
||
try {
|
||
$rankingsData = json_decode(file_get_contents($rankingsFile), true);
|
||
|
||
if (json_last_error() === JSON_ERROR_NONE) {
|
||
$timestamps = $rankingsData['timestamps'] ?? [];
|
||
$pages = $rankingsData['pages'] ?? [];
|
||
$globalMetrics = $rankingsData['global_metrics'] ?? [];
|
||
|
||
// Get the last timestamp as last_updated
|
||
if (!empty($timestamps)) {
|
||
$lastUpdated = end($timestamps);
|
||
}
|
||
}
|
||
} catch (\Exception $e) {
|
||
// Log the error
|
||
error_log("Error loading rankings data: " . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
return $this->render('admin/wiki_rankings.html.twig', [
|
||
'timestamps' => $timestamps,
|
||
'pages' => $pages,
|
||
'global_metrics' => $globalMetrics,
|
||
'last_updated' => $lastUpdated,
|
||
'json_exists' => file_exists($rankingsFile)
|
||
]);
|
||
}
|
||
/**
|
||
* Detects incorrect heading hierarchies in a list of sections
|
||
* For example, h4 directly under h2 without h3 in between
|
||
*
|
||
* @param array $sections List of sections with 'level' and 'title' keys
|
||
* @return array List of section indices with hierarchy errors
|
||
*/
|
||
private function detectHeadingHierarchyErrors(array $sections): array
|
||
{
|
||
$errors = [];
|
||
$lastLevel = 0;
|
||
|
||
foreach ($sections as $index => $section) {
|
||
$currentLevel = isset($section['level']) ? (int)$section['level'] : 0;
|
||
|
||
// Skip if level is not set or is 0
|
||
if ($currentLevel === 0) {
|
||
continue;
|
||
}
|
||
|
||
// If this is the first section, just record its level
|
||
if ($lastLevel === 0) {
|
||
$lastLevel = $currentLevel;
|
||
continue;
|
||
}
|
||
|
||
// Check if the level jump is more than 1
|
||
// For example, h2 -> h4 (skipping h3)
|
||
if ($currentLevel > $lastLevel + 1) {
|
||
$errors[] = $index;
|
||
}
|
||
|
||
$lastLevel = $currentLevel;
|
||
}
|
||
|
||
return $errors;
|
||
}
|
||
|
||
/**
|
||
* Builds an aligned list of sections for English and French
|
||
* Adds empty placeholders in the French column for sections that exist in English but not in French
|
||
*
|
||
* @param array $sectionComparison Section comparison data with 'common', 'en_only', and 'fr_only' keys
|
||
* @return array Aligned section list with 'en' and 'fr' columns
|
||
*/
|
||
private function buildAlignedSectionList(array $sectionComparison): array
|
||
{
|
||
$alignedSections = [];
|
||
|
||
// First, process common sections (they already have both en and fr)
|
||
// if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||
// foreach ($sectionComparison['common'] as $section) {
|
||
// $alignedSections[] = [
|
||
// 'en' => $section['en'],
|
||
// 'fr' => $section['fr']
|
||
// ];
|
||
// }
|
||
// }
|
||
|
||
// Then, process English-only sections and add empty placeholders for French
|
||
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
|
||
foreach ($sectionComparison['en_only'] as $section) {
|
||
$alignedSections[] = [
|
||
'en' => [
|
||
'title' => $section['title'],
|
||
'level' => $section['level']
|
||
],
|
||
'fr' => [
|
||
'title' => '', // Empty placeholder
|
||
'level' => $section['level'], // Same level as English
|
||
'is_placeholder' => true
|
||
]
|
||
];
|
||
}
|
||
}
|
||
//
|
||
// // Finally, process French-only sections (these will be shown at the end)
|
||
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
|
||
foreach ($sectionComparison['fr_only'] as $section) {
|
||
$alignedSections[] = [
|
||
'en' => [
|
||
'title' => '', // Empty placeholder
|
||
'level' => $section['level'], // Same level as French
|
||
'is_placeholder' => true
|
||
],
|
||
'fr' => [
|
||
'title' => $section['title'],
|
||
'level' => $section['level']
|
||
]
|
||
];
|
||
}
|
||
}
|
||
|
||
return $alignedSections;
|
||
}
|
||
|
||
#[Route('/', name: 'app_public_index')]
|
||
public function accueilAction(): Response
|
||
{
|
||
return $this->redirectToRoute('app_admin_wiki');
|
||
}
|
||
#[Route('/wiki/recent-changes', name: 'app_admin_wiki_recent_changes')]
|
||
public function recentChanges(): Response
|
||
{
|
||
$recentChangesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/recent_changes.json';
|
||
|
||
// Initialize arrays
|
||
$recentChanges = [];
|
||
$lastUpdated = null;
|
||
$teamMembers = [];
|
||
|
||
// Check if the recent changes file exists and load it
|
||
if (file_exists($recentChangesFile)) {
|
||
$recentChangesData = json_decode(file_get_contents($recentChangesFile), true);
|
||
|
||
if (isset($recentChangesData['recent_changes']) && is_array($recentChangesData['recent_changes'])) {
|
||
$recentChanges = $recentChangesData['recent_changes'];
|
||
$lastUpdated = isset($recentChangesData['last_updated']) ? $recentChangesData['last_updated'] : null;
|
||
|
||
// Process team members statistics
|
||
$teamMembers = $this->processTeamMembersStats($recentChanges);
|
||
}
|
||
|
||
// Check if the data is older than 1 hour
|
||
if ($lastUpdated) {
|
||
$lastUpdatedTime = new \DateTime($lastUpdated);
|
||
$now = new \DateTime();
|
||
$diff = $now->diff($lastUpdatedTime);
|
||
|
||
// If older than 1 hour, refresh the data
|
||
if ($diff->h >= 1 || $diff->days > 0) {
|
||
// $this->refreshRecentChangesData();
|
||
// return $this->redirectToRoute('app_admin_wiki_recent_changes');
|
||
}
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
$this->refreshRecentChangesData();
|
||
|
||
// Check if the file was created
|
||
if (file_exists($recentChangesFile)) {
|
||
// return $this->redirectToRoute('app_admin_wiki_recent_changes');
|
||
} else {
|
||
$this->addFlash('error', 'Impossible de générer le fichier des changements récents.');
|
||
}
|
||
}
|
||
|
||
return $this->render('admin/wiki_recent_changes.html.twig', [
|
||
'recent_changes' => $recentChanges,
|
||
'last_updated' => $lastUpdated,
|
||
'team_members' => $teamMembers
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Process team members statistics from recent changes data
|
||
*
|
||
* @param array $recentChanges Recent changes data
|
||
* @return array Team members statistics
|
||
*/
|
||
private function processTeamMembersStats(array $recentChanges): array
|
||
{
|
||
$teamMembers = [];
|
||
|
||
// Group changes by user and count modifications
|
||
foreach ($recentChanges as $change) {
|
||
$user = $change['user'];
|
||
$changeSize = $change['change_size'];
|
||
|
||
// Initialize user data if not exists
|
||
if (!isset($teamMembers[$user])) {
|
||
$teamMembers[$user] = [
|
||
'username' => $user,
|
||
'contributions' => 0,
|
||
'chars_added' => 0,
|
||
'chars_changed' => 0,
|
||
'chars_deleted' => 0,
|
||
'user_url' => "https://wiki.openstreetmap.org/wiki/User:" . urlencode($user)
|
||
];
|
||
}
|
||
|
||
// Increment contribution count
|
||
$teamMembers[$user]['contributions']++;
|
||
|
||
// Process change size
|
||
if (is_numeric($changeSize)) {
|
||
$changeSize = (int)$changeSize;
|
||
if ($changeSize > 0) {
|
||
$teamMembers[$user]['chars_added'] += $changeSize;
|
||
} elseif ($changeSize < 0) {
|
||
$teamMembers[$user]['chars_deleted'] += abs($changeSize);
|
||
} else {
|
||
// Change size is 0, might be a new page or other change
|
||
$teamMembers[$user]['chars_changed'] += 0;
|
||
}
|
||
} elseif (preg_match('/^\+(\d+)$/', $changeSize, $matches)) {
|
||
// Format like "+123"
|
||
$teamMembers[$user]['chars_added'] += (int)$matches[1];
|
||
} elseif (preg_match('/^−(\d+)$/', $changeSize, $matches)) {
|
||
// Format like "−123" (note: this is not a regular minus sign)
|
||
$teamMembers[$user]['chars_deleted'] += (int)$matches[1];
|
||
}
|
||
}
|
||
|
||
// Convert to indexed array and sort by contributions count (descending)
|
||
$teamMembers = array_values($teamMembers);
|
||
usort($teamMembers, function ($a, $b) {
|
||
return $b['contributions'] - $a['contributions'];
|
||
});
|
||
|
||
return $teamMembers;
|
||
}
|
||
|
||
/**
|
||
* Refresh the recent changes data by running the fetch_recent_changes.py script
|
||
*/
|
||
private function refreshRecentChangesData(): void
|
||
{
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_recent_changes.py';
|
||
if (file_exists($scriptPath)) {
|
||
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode !== 0) {
|
||
$this->addFlash('warning', 'Impossible de mettre à jour les changements récents. Erreur: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script fetch_recent_changes.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
#[Route('/wiki/missing-translations', name: 'app_admin_wiki_missing_translations')]
|
||
public function missingTranslations(): Response
|
||
{
|
||
$untranslatedFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/untranslated_french_pages.json';
|
||
|
||
// Initialize arrays
|
||
$untranslatedPages = [];
|
||
$lastUpdated = null;
|
||
|
||
// Check if the untranslated pages file exists and load it
|
||
if (file_exists($untranslatedFile)) {
|
||
$untranslatedData = json_decode(file_get_contents($untranslatedFile), true);
|
||
|
||
if (isset($untranslatedData['untranslated_pages']) && is_array($untranslatedData['untranslated_pages'])) {
|
||
$untranslatedPages = $untranslatedData['untranslated_pages'];
|
||
$lastUpdated = isset($untranslatedData['last_updated']) ? $untranslatedData['last_updated'] : null;
|
||
}
|
||
|
||
// Check if the data is older than 1 hour
|
||
if ($lastUpdated) {
|
||
$lastUpdatedTime = new \DateTime($lastUpdated);
|
||
$now = new \DateTime();
|
||
$diff = $now->diff($lastUpdatedTime);
|
||
|
||
// If older than 1 hour, refresh the data
|
||
if ($diff->h >= 1 || $diff->days > 0) {
|
||
$this->refreshUntranslatedPagesData();
|
||
return $this->redirectToRoute('app_admin_wiki_missing_translations');
|
||
}
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
$this->refreshUntranslatedPagesData();
|
||
|
||
// Check if the file was created
|
||
if (file_exists($untranslatedFile)) {
|
||
return $this->redirectToRoute('app_admin_wiki_missing_translations');
|
||
} else {
|
||
$this->addFlash('error', 'Impossible de générer le fichier des pages sans traduction.');
|
||
}
|
||
}
|
||
|
||
// Remove duplicates based on page title
|
||
$uniquePages = [];
|
||
$seenTitles = [];
|
||
|
||
foreach ($untranslatedPages as $page) {
|
||
if (!isset($seenTitles[$page['title']])) {
|
||
$seenTitles[$page['title']] = true;
|
||
$uniquePages[] = $page;
|
||
}
|
||
}
|
||
|
||
// Sort pages by title
|
||
usort($uniquePages, function($a, $b) {
|
||
return strcasecmp($a['title'], $b['title']);
|
||
});
|
||
|
||
return $this->render('admin/wiki_missing_translations.html.twig', [
|
||
'untranslated_pages' => $uniquePages,
|
||
'last_updated' => $lastUpdated
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Refresh the untranslated pages data by running the find_untranslated_french_pages.py script
|
||
*/
|
||
private function refreshUntranslatedPagesData(): void
|
||
{
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/find_untranslated_french_pages.py';
|
||
if (file_exists($scriptPath)) {
|
||
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode !== 0) {
|
||
$this->addFlash('warning', 'Impossible de mettre à jour les pages sans traduction. Erreur: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script find_untranslated_french_pages.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
#[Route('/wiki/pages-unavailable-in-french', name: 'app_admin_wiki_pages_unavailable_in_french')]
|
||
public function pagesUnavailableInFrench(): Response
|
||
{
|
||
$unavailablePagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/pages_unavailable_in_french.json';
|
||
|
||
// Initialize arrays
|
||
$groupedPages = [];
|
||
$allPages = [];
|
||
$lastUpdated = null;
|
||
|
||
// Check if the unavailable pages file exists and load it
|
||
if (file_exists($unavailablePagesFile)) {
|
||
$unavailableData = json_decode(file_get_contents($unavailablePagesFile), true);
|
||
|
||
if (isset($unavailableData['grouped_pages']) && is_array($unavailableData['grouped_pages'])) {
|
||
$groupedPages = $unavailableData['grouped_pages'];
|
||
}
|
||
|
||
if (isset($unavailableData['all_pages']) && is_array($unavailableData['all_pages'])) {
|
||
$allPages = $unavailableData['all_pages'];
|
||
}
|
||
|
||
$lastUpdated = isset($unavailableData['last_updated']) ? $unavailableData['last_updated'] : null;
|
||
|
||
// Check if the data is older than 1 hour
|
||
if ($lastUpdated) {
|
||
$lastUpdatedTime = new \DateTime($lastUpdated);
|
||
$now = new \DateTime();
|
||
$diff = $now->diff($lastUpdatedTime);
|
||
|
||
// If older than 1 hour, refresh the data
|
||
if ($diff->h >= 1 || $diff->days > 0) {
|
||
$this->refreshPagesUnavailableInFrenchData();
|
||
return $this->redirectToRoute('app_admin_wiki_pages_unavailable_in_french');
|
||
}
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
$this->refreshPagesUnavailableInFrenchData();
|
||
|
||
// Check if the file was created
|
||
if (file_exists($unavailablePagesFile)) {
|
||
return $this->redirectToRoute('app_admin_wiki_pages_unavailable_in_french');
|
||
} else {
|
||
$this->addFlash('error', 'Impossible de générer le fichier des pages non disponibles en français.');
|
||
}
|
||
}
|
||
|
||
// Move English pages to the top of the list
|
||
$englishPages = $groupedPages['En'] ?? [];
|
||
unset($groupedPages['En']);
|
||
|
||
// Sort other language groups alphabetically
|
||
ksort($groupedPages);
|
||
|
||
// Reinsert English pages at the beginning
|
||
if (!empty($englishPages)) {
|
||
$groupedPages = ['En' => $englishPages] + $groupedPages;
|
||
}
|
||
|
||
return $this->render('admin/wiki_pages_unavailable_in_french.html.twig', [
|
||
'grouped_pages' => $groupedPages,
|
||
'all_pages' => $allPages,
|
||
'last_updated' => $lastUpdated
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Refresh the pages unavailable in French data by running the find_pages_unavailable_in_french.py script
|
||
*/
|
||
private function refreshPagesUnavailableInFrenchData(): void
|
||
{
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/find_pages_unavailable_in_french.py';
|
||
if (file_exists($scriptPath)) {
|
||
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode !== 0) {
|
||
$this->addFlash('warning', 'Impossible de mettre à jour les pages non disponibles en français. Erreur: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script find_pages_unavailable_in_french.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
#[Route('/wiki/osm-fr-groups', name: 'app_admin_wiki_osm_fr_groups')]
|
||
public function osmFrGroups(): Response
|
||
{
|
||
$groupsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/osm_fr_groups.json';
|
||
|
||
// Initialize arrays
|
||
$workingGroups = [];
|
||
$localGroups = [];
|
||
$umapUrl = 'https://umap.openstreetmap.fr/fr/map/groupes-locaux-openstreetmap_152488';
|
||
$lastUpdated = null;
|
||
|
||
// Check if the groups file exists and load it
|
||
if (file_exists($groupsFile)) {
|
||
$groupsData = json_decode(file_get_contents($groupsFile), true);
|
||
|
||
if (isset($groupsData['working_groups']) && is_array($groupsData['working_groups'])) {
|
||
$workingGroups = $groupsData['working_groups'];
|
||
}
|
||
|
||
if (isset($groupsData['local_groups']) && is_array($groupsData['local_groups'])) {
|
||
$localGroups = $groupsData['local_groups'];
|
||
}
|
||
|
||
$umapUrl = isset($groupsData['umap_url']) ? $groupsData['umap_url'] : 'https://umap.openstreetmap.fr/fr/map/groupes-locaux-openstreetmap_152488';
|
||
$lastUpdated = isset($groupsData['last_updated']) ? $groupsData['last_updated'] : null;
|
||
|
||
// Check if the data is older than 1 hour
|
||
if ($lastUpdated) {
|
||
$lastUpdatedTime = new \DateTime($lastUpdated);
|
||
$now = new \DateTime();
|
||
$diff = $now->diff($lastUpdatedTime);
|
||
|
||
// If older than 1 hour, refresh the data
|
||
// if ($diff->h >= 1 || $diff->days > 0) {
|
||
// $this->refreshOsmFrGroupsData();
|
||
// return $this->redirectToRoute('app_admin_wiki_osm_fr_groups');
|
||
// }
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
$this->refreshOsmFrGroupsData();
|
||
|
||
// Check if the file was created
|
||
if (file_exists($groupsFile)) {
|
||
// return $this->redirectToRoute('app_admin_wiki_osm_fr_groups');
|
||
} else {
|
||
$this->addFlash('error', 'Impossible de générer le fichier des groupes OSM-FR.');
|
||
}
|
||
}
|
||
|
||
// Group working groups by category
|
||
$groupedWorkingGroups = [];
|
||
foreach ($workingGroups as $group) {
|
||
$category = $group['category'] ?? 'Autres';
|
||
if (!isset($groupedWorkingGroups[$category])) {
|
||
$groupedWorkingGroups[$category] = [];
|
||
}
|
||
$groupedWorkingGroups[$category][] = $group;
|
||
}
|
||
|
||
// Sort categories alphabetically
|
||
ksort($groupedWorkingGroups);
|
||
|
||
return $this->render('admin/wiki_osm_fr_groups.html.twig', [
|
||
'working_groups' => $groupedWorkingGroups,
|
||
'local_groups' => $localGroups,
|
||
'umap_url' => $umapUrl,
|
||
'last_updated' => $lastUpdated
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Refresh the OSM-FR groups data by running the fetch_osm_fr_groups.py script
|
||
*/
|
||
private function refreshOsmFrGroupsData(): void
|
||
{
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_osm_fr_groups.py';
|
||
if (file_exists($scriptPath)) {
|
||
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode !== 0) {
|
||
$this->addFlash('warning', 'Impossible de mettre à jour les groupes OSM-FR. Erreur: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script fetch_osm_fr_groups.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
#[Route('/wiki/suspicious-deletions', name: 'app_admin_wiki_suspicious_deletions')]
|
||
public function suspiciousDeletions(): Response
|
||
{
|
||
$suspiciousDeletesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/suspicious_deletions.json';
|
||
$wordDiffFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
|
||
|
||
// Initialize arrays
|
||
$suspiciousPages = [];
|
||
$wordDiffPages = [];
|
||
|
||
// Check if the suspicious deletions file exists and load it
|
||
if (file_exists($suspiciousDeletesFile)) {
|
||
$suspiciousData = json_decode(file_get_contents($suspiciousDeletesFile), true);
|
||
|
||
if (isset($suspiciousData['deletions']) && is_array($suspiciousData['deletions'])) {
|
||
$suspiciousPages = $suspiciousData['deletions'];
|
||
$lastUpdated = isset($suspiciousData['last_updated']) ? $suspiciousData['last_updated'] : null;
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/detect_suspicious_deletions.py';
|
||
if (file_exists($scriptPath)) {
|
||
exec('python3 ' . $scriptPath . ' 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode === 0 && file_exists($suspiciousDeletesFile)) {
|
||
$suspiciousData = json_decode(file_get_contents($suspiciousDeletesFile), true);
|
||
if (isset($suspiciousData['deletions']) && is_array($suspiciousData['deletions'])) {
|
||
$suspiciousPages = $suspiciousData['deletions'];
|
||
$lastUpdated = isset($suspiciousData['last_updated']) ? $suspiciousData['last_updated'] : null;
|
||
}
|
||
} else {
|
||
$this->addFlash('warning', 'Impossible de générer le fichier de suppressions suspectes. Erreur: ' . implode("\n", $output));
|
||
}
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
// Also load the word-diff based suspicious pages for comparison
|
||
if (file_exists($wordDiffFile)) {
|
||
$jsonData = json_decode(file_get_contents($wordDiffFile), true);
|
||
|
||
foreach ($jsonData as $page) {
|
||
if (isset($page['fr_page']) && isset($page['en_page'])) {
|
||
// Calculate deletion percentage
|
||
$enWordCount = (int)$page['en_page']['word_count'];
|
||
$frWordCount = (int)$page['fr_page']['word_count'];
|
||
$wordDiff = $enWordCount - $frWordCount;
|
||
|
||
// If English has more words and the difference is significant (>30%)
|
||
if ($wordDiff > 0 && $frWordCount > 0 && ($wordDiff / $enWordCount) > 0.3) {
|
||
$page['deletion_percentage'] = round(($wordDiff / $enWordCount) * 100, 2);
|
||
$wordDiffPages[] = $page;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Sort by deletion percentage (highest first)
|
||
usort($wordDiffPages, function ($a, $b) {
|
||
return $b['deletion_percentage'] <=> $a['deletion_percentage'];
|
||
});
|
||
}
|
||
|
||
return $this->render('admin/wiki_suspicious_deletions.html.twig', [
|
||
'suspicious_pages' => $wordDiffPages,
|
||
'recent_deletions' => $suspiciousPages,
|
||
'last_updated' => $lastUpdated ?? null
|
||
]);
|
||
}
|
||
|
||
#[Route('/wiki/tag-proposals', name: 'app_admin_wiki_tag_proposals')]
|
||
public function tagProposals(): Response
|
||
{
|
||
$proposalsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/proposals.json';
|
||
|
||
// Initialize arrays
|
||
$votingProposals = [];
|
||
$recentProposals = [];
|
||
$lastUpdated = null;
|
||
|
||
// Check if the proposals file exists and load it
|
||
if (file_exists($proposalsFile)) {
|
||
$proposalsData = json_decode(file_get_contents($proposalsFile), true);
|
||
|
||
if (isset($proposalsData['voting_proposals']) && is_array($proposalsData['voting_proposals'])) {
|
||
$votingProposals = $proposalsData['voting_proposals'];
|
||
}
|
||
|
||
if (isset($proposalsData['recent_proposals']) && is_array($proposalsData['recent_proposals'])) {
|
||
$recentProposals = $proposalsData['recent_proposals'];
|
||
}
|
||
|
||
$lastUpdated = isset($proposalsData['last_updated']) ? $proposalsData['last_updated'] : null;
|
||
|
||
// Check if the data is older than 1 hour
|
||
if ($lastUpdated) {
|
||
$lastUpdatedTime = new \DateTime($lastUpdated);
|
||
$now = new \DateTime();
|
||
$diff = $now->diff($lastUpdatedTime);
|
||
|
||
// If older than 1 hour, refresh the data
|
||
// if ($diff->h >= 1 || $diff->days > 0) {
|
||
// $this->refreshProposalsData();
|
||
// return $this->redirectToRoute('app_admin_wiki_tag_proposals');
|
||
// }
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
$this->refreshProposalsData();
|
||
|
||
// Check if the file was created
|
||
if (file_exists($proposalsFile)) {
|
||
// return $this->redirectToRoute('app_admin_wiki_tag_proposals');
|
||
} else {
|
||
$this->addFlash('error', 'Impossible de générer le fichier de propositions.');
|
||
}
|
||
}
|
||
|
||
// Format the proposals for the template
|
||
$formattedProposals = [];
|
||
|
||
foreach ($votingProposals as $proposal) {
|
||
$formattedProposal = [
|
||
'feature' => $proposal['title'],
|
||
'url' => $proposal['url'],
|
||
'description' => 'Proposition en cours de vote',
|
||
'proposer' => $proposal['proposer'] ?? '',
|
||
'status' => $proposal['status'] ?? 'Voting',
|
||
'type' => 'voting'
|
||
];
|
||
|
||
// Add voting information if available
|
||
if (isset($proposal['votes'])) {
|
||
$formattedProposal['votes'] = $proposal['votes'];
|
||
$formattedProposal['total_votes'] = $proposal['total_votes'] ?? 0;
|
||
$formattedProposal['approve_percentage'] = $proposal['approve_percentage'] ?? 0;
|
||
$formattedProposal['oppose_percentage'] = $proposal['oppose_percentage'] ?? 0;
|
||
$formattedProposal['abstain_percentage'] = $proposal['abstain_percentage'] ?? 0;
|
||
}
|
||
|
||
$formattedProposals[] = $formattedProposal;
|
||
}
|
||
|
||
foreach ($recentProposals as $proposal) {
|
||
$formattedProposals[] = [
|
||
'feature' => $proposal['title'],
|
||
'url' => $proposal['url'],
|
||
'description' => 'Dernière modification: ' . $proposal['last_modified'],
|
||
'proposer' => $proposal['modified_by'],
|
||
'status' => 'Draft',
|
||
'type' => 'recent'
|
||
];
|
||
}
|
||
|
||
return $this->render('admin/wiki_tag_proposals.html.twig', [
|
||
'proposals' => $formattedProposals,
|
||
'last_updated' => $lastUpdated
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Refresh the proposals data by running the fetch_proposals.py script
|
||
*/
|
||
private function refreshProposalsData(): void
|
||
{
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_proposals.py';
|
||
if (file_exists($scriptPath)) {
|
||
exec('python3 ' . $scriptPath . ' --force 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode !== 0) {
|
||
$this->addFlash('warning', 'Impossible de mettre à jour les propositions. Erreur: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script fetch_proposals.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
#[Route('/wiki/random-suggestion', name: 'app_admin_wiki_random_suggestion')]
|
||
public function randomSuggestion(): Response
|
||
{
|
||
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
|
||
|
||
if (!file_exists($jsonFile)) {
|
||
$this->addFlash('error', 'Le fichier outdated_pages.json n\'existe pas.');
|
||
return $this->redirectToRoute('app_admin_wiki');
|
||
}
|
||
|
||
// Use memory-efficient approach to extract only the necessary data
|
||
$maxItems = 100; // Limit the number of items to prevent memory exhaustion
|
||
|
||
// Extract regular_pages and specific_pages arrays
|
||
$regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems);
|
||
$specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems);
|
||
|
||
// Combine them into a single array
|
||
$allPages = array_merge($regularPages, $specificPages);
|
||
|
||
if (empty($allPages)) {
|
||
$this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.');
|
||
return $this->redirectToRoute('app_admin_wiki');
|
||
}
|
||
|
||
// Select a random page from the combined pages
|
||
$randomPage = $allPages[array_rand($allPages)];
|
||
|
||
return $this->render('admin/wiki_random_suggestion.html.twig', [
|
||
'page' => $randomPage
|
||
]);
|
||
}
|
||
|
||
#[Route('/wiki/create-french/{key}', name: 'app_admin_wiki_create_french', requirements: ['key' => '.+'])]
|
||
public function createFrench(string $key): Response
|
||
{
|
||
// Construct the URLs for the English page and the French page creation form
|
||
$englishUrl = "https://wiki.openstreetmap.org/wiki/{$key}";
|
||
$frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit";
|
||
|
||
// Fetch the HTML content of the English page using wiki_compare.py
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_compare.py';
|
||
$englishHtml = null;
|
||
$frenchHtml = null;
|
||
$frenchCacheExists = false;
|
||
|
||
if (file_exists($scriptPath)) {
|
||
// Create a temporary Python script to fetch the page content
|
||
$tempScriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/temp_fetch_page.py';
|
||
$pythonCode = <<<EOT
|
||
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
import sys
|
||
import json
|
||
import hashlib
|
||
from pathlib import Path
|
||
from wiki_compare import fetch_wiki_page, HTML_CACHE_DIR
|
||
|
||
# Get the key from command line arguments
|
||
key = sys.argv[1]
|
||
language = sys.argv[2]
|
||
|
||
# Check if we're just checking cache existence
|
||
check_cache_only = len(sys.argv) > 3 and sys.argv[3] == 'check_cache'
|
||
|
||
if check_cache_only and language == 'fr':
|
||
# For French pages, construct the URL to check cache
|
||
if key.startswith('http'):
|
||
url = key
|
||
else:
|
||
url = f"https://wiki.openstreetmap.org/wiki/FR:{key}"
|
||
|
||
# Create cache key
|
||
cache_key = hashlib.md5(url.encode()).hexdigest()
|
||
cache_file = Path(HTML_CACHE_DIR) / f"{cache_key}.html"
|
||
|
||
# Check if cache exists
|
||
if cache_file.exists():
|
||
print("CACHE_EXISTS")
|
||
else:
|
||
print("CACHE_MISSING")
|
||
else:
|
||
# Normal fetch operation
|
||
page = fetch_wiki_page(key, language)
|
||
|
||
# Output the HTML content
|
||
if page and 'html_content' in page:
|
||
print(page['html_content'])
|
||
else:
|
||
print("")
|
||
EOT;
|
||
|
||
file_put_contents($tempScriptPath, $pythonCode);
|
||
chmod($tempScriptPath, 0755);
|
||
|
||
// First check if French page exists in cache
|
||
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr check_cache";
|
||
$cacheCheckResult = trim(shell_exec($command));
|
||
$frenchCacheExists = ($cacheCheckResult === "CACHE_EXISTS");
|
||
|
||
// Fetch English page
|
||
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} en";
|
||
$englishHtml = shell_exec($command);
|
||
|
||
// Extract only the content part from the HTML (remove headers, footers, etc.)
|
||
if ($englishHtml) {
|
||
$englishHtml = $this->extractMainContent($englishHtml);
|
||
}
|
||
|
||
// Fetch French page (might not exist, but we'll try)
|
||
$command = "cd " . $this->getParameter('kernel.project_dir') . "/wiki_compare && python3 {$tempScriptPath} {$key} fr";
|
||
$frenchHtml = shell_exec($command);
|
||
|
||
// Extract only the content part from the HTML (remove headers, footers, etc.)
|
||
if ($frenchHtml) {
|
||
$frenchHtml = $this->extractMainContent($frenchHtml);
|
||
}
|
||
|
||
// Clean up the temporary script
|
||
unlink($tempScriptPath);
|
||
}
|
||
|
||
return $this->render('admin/wiki_create_french.html.twig', [
|
||
'key' => $key,
|
||
'english_url' => $englishUrl,
|
||
'french_edit_url' => $frenchEditUrl,
|
||
'english_html' => $englishHtml,
|
||
'french_html' => $frenchHtml,
|
||
'french_cache_exists' => $frenchCacheExists
|
||
]);
|
||
}
|
||
|
||
#[Route('/wiki/archived-proposals', name: 'app_admin_wiki_archived_proposals')]
|
||
public function archivedProposals(\Symfony\Component\HttpFoundation\Request $request): Response
|
||
{
|
||
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/archived_proposals.json';
|
||
$forceRefresh = $request->query->has('refresh');
|
||
$limit = $request->query->get('limit') ? (int)$request->query->get('limit') : null;
|
||
|
||
// Initialize arrays
|
||
$proposals = [];
|
||
$statistics = [];
|
||
$lastUpdated = null;
|
||
|
||
// Check if we should force a refresh
|
||
if ($forceRefresh) {
|
||
$this->refreshArchivedProposalsData($limit);
|
||
$this->addFlash('success', 'Les données des propositions archivées ont été rafraîchies.');
|
||
|
||
// Preserve the limit parameter in the redirect if it was provided
|
||
if ($limit) {
|
||
return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]);
|
||
}
|
||
return $this->redirectToRoute('app_admin_wiki_archived_proposals');
|
||
}
|
||
|
||
// Check if the archived proposals file exists and load it
|
||
if (file_exists($jsonFile)) {
|
||
// Use memory-efficient approach to extract only the necessary data
|
||
$maxItems = 100; // Limit the number of items to prevent memory exhaustion
|
||
|
||
// Extract proposals array
|
||
$proposals = $this->extractJsonArrayByKey($jsonFile, 'proposals', $maxItems);
|
||
|
||
// Extract statistics object
|
||
$statistics = $this->extractJsonArrayByKey($jsonFile, 'statistics', $maxItems);
|
||
|
||
// Extract last_updated value
|
||
$lastUpdated = $this->extractJsonScalarByKey($jsonFile, 'last_updated');
|
||
|
||
// Check if the data is older than 1 day
|
||
if ($lastUpdated) {
|
||
$lastUpdatedTime = new \DateTime($lastUpdated);
|
||
$now = new \DateTime();
|
||
$diff = $now->diff($lastUpdatedTime);
|
||
|
||
// If older than 1 day, refresh the data
|
||
if ($diff->days > 1) {
|
||
$this->refreshArchivedProposalsData($limit);
|
||
$this->addFlash('info', 'Les données des propositions archivées ont été automatiquement mises à jour car elles dataient de plus d\'un jour.');
|
||
|
||
// Preserve the limit parameter in the redirect if it was provided
|
||
if ($limit) {
|
||
return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]);
|
||
}
|
||
return $this->redirectToRoute('app_admin_wiki_archived_proposals');
|
||
}
|
||
}
|
||
} else {
|
||
// If the file doesn't exist, try to create it by running the script
|
||
$this->refreshArchivedProposalsData($limit);
|
||
|
||
// Check if the file was created
|
||
if (file_exists($jsonFile)) {
|
||
$this->addFlash('success', 'Le fichier des propositions archivées a été généré avec succès.');
|
||
|
||
// Preserve the limit parameter in the redirect if it was provided
|
||
if ($limit) {
|
||
return $this->redirectToRoute('app_admin_wiki_archived_proposals', ['limit' => $limit]);
|
||
}
|
||
return $this->redirectToRoute('app_admin_wiki_archived_proposals');
|
||
} else {
|
||
$this->addFlash('error', 'Impossible de générer le fichier des propositions archivées.');
|
||
}
|
||
}
|
||
|
||
return $this->render('admin/wiki_archived_proposals.html.twig', [
|
||
'proposals' => $proposals,
|
||
'statistics' => $statistics,
|
||
'last_updated' => $lastUpdated,
|
||
'limit' => $limit
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Refresh the archived proposals data by running the fetch_archived_proposals.py script
|
||
*
|
||
* @param int|null $limit Optional limit for the number of proposals to process
|
||
*/
|
||
private function refreshArchivedProposalsData(?int $limit = null): void
|
||
{
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/fetch_archived_proposals.py';
|
||
if (file_exists($scriptPath)) {
|
||
$command = 'python3 ' . $scriptPath;
|
||
|
||
// Add limit parameter if provided
|
||
if ($limit !== null) {
|
||
$command .= ' --limit ' . $limit;
|
||
}
|
||
|
||
exec($command . ' 2>&1', $output, $returnCode);
|
||
|
||
if ($returnCode !== 0) {
|
||
$this->addFlash('warning', 'Impossible de mettre à jour les propositions archivées. Erreur: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script fetch_archived_proposals.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
}
|
||
|
||
#[Route('/wiki', name: 'app_admin_wiki')]
|
||
public function index(): Response
|
||
{
|
||
$csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv';
|
||
|
||
if (!file_exists($csvFile)) {
|
||
$this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.');
|
||
return $this->redirectToRoute('app_admin_index');
|
||
}
|
||
|
||
$csvData = array_map('str_getcsv', file($csvFile));
|
||
$headers = array_shift($csvData);
|
||
|
||
$wikiPages = [];
|
||
$missingTranslations = [];
|
||
$pageDifferences = [];
|
||
$pagesUnavailableInEnglish = [];
|
||
|
||
// Collect all staleness scores for statistics
|
||
$stalenessScores = [];
|
||
foreach ($csvData as $row) {
|
||
$page = array_combine($headers, $row);
|
||
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
|
||
$stalenessScores[] = (float)$page['staleness_score'];
|
||
}
|
||
}
|
||
|
||
// Calculate statistics
|
||
$stalenessStats = [
|
||
'count' => count($stalenessScores),
|
||
'min' => !empty($stalenessScores) ? min($stalenessScores) : 0,
|
||
'max' => !empty($stalenessScores) ? max($stalenessScores) : 0,
|
||
'mean' => 0,
|
||
'std_dev' => 0
|
||
];
|
||
|
||
// Calculate mean
|
||
if (!empty($stalenessScores)) {
|
||
$stalenessStats['mean'] = array_sum($stalenessScores) / count($stalenessScores);
|
||
|
||
// Calculate standard deviation
|
||
$variance = 0;
|
||
foreach ($stalenessScores as $score) {
|
||
$variance += pow($score - $stalenessStats['mean'], 2);
|
||
}
|
||
$stalenessStats['std_dev'] = sqrt($variance / count($stalenessScores));
|
||
}
|
||
|
||
// Round statistics to 2 decimal places
|
||
$stalenessStats['mean'] = round($stalenessStats['mean'], 2);
|
||
$stalenessStats['std_dev'] = round($stalenessStats['std_dev'], 2);
|
||
|
||
// Process pages - use absolute values without normalization
|
||
foreach ($csvData as $row) {
|
||
$page = array_combine($headers, $row);
|
||
|
||
// Use absolute values of staleness score without normalization
|
||
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
|
||
$page['staleness_score'] = abs((float)$page['staleness_score']);
|
||
// Round to 2 decimal places
|
||
$page['staleness_score'] = round($page['staleness_score'], 2);
|
||
}
|
||
|
||
$wikiPages[$page['key']][$page['language']] = $page;
|
||
}
|
||
|
||
// Identify pages missing French translations
|
||
foreach ($wikiPages as $key => $languages) {
|
||
if (isset($languages['en']) && !isset($languages['fr'])) {
|
||
$missingTranslations[$key] = $languages['en'];
|
||
}
|
||
}
|
||
|
||
|
||
// Prepare arrays for statistics
|
||
$stats = [
|
||
'en_sections' => [],
|
||
'fr_sections' => [],
|
||
'en_words' => [],
|
||
'fr_words' => [],
|
||
'en_links' => [],
|
||
'fr_links' => [],
|
||
'en_media' => [],
|
||
'fr_media' => []
|
||
];
|
||
|
||
// Calculate differences between English and French versions
|
||
foreach ($wikiPages as $key => $languages) {
|
||
if (isset($languages['en']) && isset($languages['fr'])) {
|
||
$en = $languages['en'];
|
||
$fr = $languages['fr'];
|
||
|
||
// Calculate differences (French - English)
|
||
$sectionDiff = (int)$fr['sections'] - (int)$en['sections'];
|
||
$wordDiff = (int)$fr['word_count'] - (int)$en['word_count'];
|
||
$linkDiff = (int)$fr['link_count'] - (int)$en['link_count'];
|
||
$mediaDiff = isset($fr['media_count']) && isset($en['media_count']) ?
|
||
(int)$fr['media_count'] - (int)$en['media_count'] : 0;
|
||
|
||
// Format differences with + or - sign
|
||
$pageDifferences[$key] = [
|
||
'section_diff' => $sectionDiff,
|
||
'section_diff_formatted' => ($sectionDiff >= 0 ? '+' : '') . $sectionDiff,
|
||
'word_diff' => $wordDiff,
|
||
'word_diff_formatted' => ($wordDiff >= 0 ? '+' : '') . $wordDiff,
|
||
'link_diff' => $linkDiff,
|
||
'link_diff_formatted' => ($linkDiff >= 0 ? '+' : '') . $linkDiff,
|
||
'media_diff' => $mediaDiff,
|
||
'media_diff_formatted' => ($mediaDiff >= 0 ? '+' : '') . $mediaDiff,
|
||
];
|
||
|
||
// Collect data for statistics
|
||
$stats['en_sections'][] = (int)$en['sections'];
|
||
$stats['fr_sections'][] = (int)$fr['sections'];
|
||
$stats['en_words'][] = (int)$en['word_count'];
|
||
$stats['fr_words'][] = (int)$fr['word_count'];
|
||
$stats['en_links'][] = (int)$en['link_count'];
|
||
$stats['fr_links'][] = (int)$fr['link_count'];
|
||
$stats['en_media'][] = isset($en['media_count']) ? (int)$en['media_count'] : 0;
|
||
$stats['fr_media'][] = isset($fr['media_count']) ? (int)$fr['media_count'] : 0;
|
||
}
|
||
}
|
||
|
||
// Calculate statistics
|
||
$wikiPagesStats = [];
|
||
foreach ($stats as $key => $values) {
|
||
if (!empty($values)) {
|
||
$mean = array_sum($values) / count($values);
|
||
|
||
// Calculate standard deviation
|
||
$variance = 0;
|
||
foreach ($values as $value) {
|
||
$variance += pow($value - $mean, 2);
|
||
}
|
||
$stdDev = sqrt($variance / count($values));
|
||
|
||
$wikiPagesStats[$key] = [
|
||
'count' => count($values),
|
||
'min' => min($values),
|
||
'max' => max($values),
|
||
'mean' => round($mean, 2),
|
||
'std_dev' => round($stdDev, 2)
|
||
];
|
||
}
|
||
}
|
||
|
||
// Sort wiki pages by staleness score (descending)
|
||
uasort($wikiPages, function ($a, $b) {
|
||
$scoreA = isset($a['en']) && isset($a['fr']) && isset($a['en']['staleness_score']) ? (float)$a['en']['staleness_score'] : 0;
|
||
$scoreB = isset($b['en']) && isset($b['fr']) && isset($b['en']['staleness_score']) ? (float)$b['en']['staleness_score'] : 0;
|
||
return $scoreB <=> $scoreA;
|
||
});
|
||
|
||
// Load pages unavailable in English
|
||
$pagesUnavailableInEnglishFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/pages_unavailable_in_english.json';
|
||
if (file_exists($pagesUnavailableInEnglishFile)) {
|
||
$pagesUnavailableInEnglishData = json_decode(file_get_contents($pagesUnavailableInEnglishFile), true);
|
||
if (isset($pagesUnavailableInEnglishData['pages']) && is_array($pagesUnavailableInEnglishData['pages'])) {
|
||
// Deduplicate pages based on URL
|
||
$uniquePages = [];
|
||
$seenUrls = [];
|
||
|
||
foreach ($pagesUnavailableInEnglishData['pages'] as $page) {
|
||
if (isset($page['url'])) {
|
||
// Use URL as the key for deduplication
|
||
$url = $page['url'];
|
||
if (!isset($seenUrls[$url])) {
|
||
$seenUrls[$url] = true;
|
||
$uniquePages[] = $page;
|
||
}
|
||
} else {
|
||
// If no URL, keep the page (shouldn't happen, but just in case)
|
||
$uniquePages[] = $page;
|
||
}
|
||
}
|
||
|
||
$pagesUnavailableInEnglish = $uniquePages;
|
||
}
|
||
}
|
||
|
||
// Load specific pages from outdated_pages.json
|
||
$specificPages = [];
|
||
$outdatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
|
||
if (file_exists($outdatedPagesFile)) {
|
||
// Use a memory-efficient approach to extract only the specific_pages array
|
||
// without loading the entire file into memory
|
||
$maxPages = 100; // Limit the number of pages to prevent memory exhaustion
|
||
$specificPages = $this->extractSpecificPagesFromJson($outdatedPagesFile, $maxPages);
|
||
}
|
||
|
||
// Load newly created French pages
|
||
$newlyCreatedPages = [];
|
||
$newlyCreatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/newly_created_french_pages.json';
|
||
if (file_exists($newlyCreatedPagesFile)) {
|
||
$newlyCreatedPagesData = json_decode(file_get_contents($newlyCreatedPagesFile), true);
|
||
if (isset($newlyCreatedPagesData['created_pages']) && is_array($newlyCreatedPagesData['created_pages'])) {
|
||
$newlyCreatedPages = $newlyCreatedPagesData['created_pages'];
|
||
}
|
||
}
|
||
|
||
// Load machine translations
|
||
$availableTranslations = [];
|
||
$translationsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/translations.json';
|
||
if (file_exists($translationsFile)) {
|
||
$translationsData = json_decode(file_get_contents($translationsFile), true);
|
||
if (isset($translationsData['translations']) && is_array($translationsData['translations'])) {
|
||
$availableTranslations = $translationsData['translations'];
|
||
}
|
||
}
|
||
|
||
// Load keys without wiki pages
|
||
$keysWithoutWiki = [];
|
||
$keysWithoutWikiFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/keys_without_wiki.json';
|
||
if (file_exists($keysWithoutWikiFile)) {
|
||
$keysWithoutWikiData = json_decode(file_get_contents($keysWithoutWikiFile), true);
|
||
if (is_array($keysWithoutWikiData)) {
|
||
$keysWithoutWiki = $keysWithoutWikiData;
|
||
}
|
||
}
|
||
|
||
return $this->render('admin/wiki.html.twig', [
|
||
'wiki_pages' => $wikiPages,
|
||
'missing_translations' => $missingTranslations,
|
||
'page_differences' => $pageDifferences,
|
||
'pages_unavailable_in_english' => $pagesUnavailableInEnglish,
|
||
'specific_pages' => $specificPages,
|
||
'newly_created_pages' => $newlyCreatedPages,
|
||
'staleness_stats' => $stalenessStats,
|
||
'wiki_pages_stats' => $wikiPagesStats,
|
||
'available_translations' => $availableTranslations,
|
||
'keys_without_wiki' => $keysWithoutWiki
|
||
]);
|
||
}
|
||
|
||
#[Route('/wiki/translate/{key}', name: 'app_admin_wiki_translate', requirements: ['key' => '.+'])]
|
||
public function translate(string $key): Response
|
||
{
|
||
$this->addFlash('info', 'Traduction en cours pour la page ' . $key);
|
||
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_translate.py';
|
||
|
||
if (file_exists($scriptPath)) {
|
||
// Execute the translation script with virtual environment
|
||
$venvPython = $this->getParameter('kernel.project_dir') . '/venv/bin/python';
|
||
$command = 'cd ' . $this->getParameter('kernel.project_dir') . ' && ' . $venvPython . ' ' . $scriptPath . ' "' . $key . '"';
|
||
$output = [];
|
||
$returnVar = 0;
|
||
exec($command, $output, $returnVar);
|
||
|
||
if ($returnVar === 0) {
|
||
$this->addFlash('success', 'Traduction réussie pour la page ' . $key);
|
||
} else {
|
||
$this->addFlash('warning', 'Problème lors de la traduction: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script wiki_translate.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
|
||
return $this->redirectToRoute('app_admin_wiki');
|
||
}
|
||
|
||
#[Route('/wiki/update-translation/{key}', name: 'app_admin_wiki_update_translation', requirements: ['key' => '.+'])]
|
||
public function updateTranslation(string $key): Response
|
||
{
|
||
$this->addFlash('info', 'Mise à jour de la traduction en cours pour la page ' . $key);
|
||
|
||
try {
|
||
$scriptPath = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_translate.py';
|
||
|
||
if (file_exists($scriptPath)) {
|
||
// Execute the translation script with the update flag and virtual environment
|
||
$venvPython = $this->getParameter('kernel.project_dir') . '/venv/bin/python';
|
||
$command = 'cd ' . $this->getParameter('kernel.project_dir') . ' && ' . $venvPython . ' ' . $scriptPath . ' "' . $key . '"';
|
||
$output = [];
|
||
$returnVar = 0;
|
||
exec($command, $output, $returnVar);
|
||
|
||
if ($returnVar === 0) {
|
||
$this->addFlash('success', 'Mise à jour de la traduction réussie pour la page ' . $key);
|
||
} else {
|
||
$this->addFlash('warning', 'Problème lors de la mise à jour de la traduction: ' . implode("\n", $output));
|
||
}
|
||
} else {
|
||
$this->addFlash('error', 'Le script wiki_translate.py n\'existe pas.');
|
||
}
|
||
} catch (\Exception $e) {
|
||
$this->addFlash('error', 'Erreur lors de l\'exécution du script: ' . $e->getMessage());
|
||
}
|
||
|
||
return $this->redirectToRoute('app_admin_wiki');
|
||
}
|
||
|
||
#[Route('/wiki/compare/{key}', name: 'app_admin_wiki_compare', requirements: ['key' => '.+'])]
|
||
public function compare(string $key): Response
|
||
{
|
||
$csvFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/wiki_pages.csv';
|
||
$jsonFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
|
||
|
||
if (!file_exists($csvFile)) {
|
||
$this->addFlash('error', 'Le fichier wiki_pages.csv n\'existe pas.');
|
||
return $this->redirectToRoute('app_admin_index');
|
||
}
|
||
|
||
$csvData = array_map('str_getcsv', file($csvFile));
|
||
$headers = array_shift($csvData);
|
||
|
||
// Process CSV data to find the requested key
|
||
$enPage = null;
|
||
$frPage = null;
|
||
|
||
foreach ($csvData as $row) {
|
||
$page = array_combine($headers, $row);
|
||
if ($page['key'] === $key) {
|
||
if ($page['language'] === 'en') {
|
||
$enPage = $page;
|
||
} elseif ($page['language'] === 'fr') {
|
||
$frPage = $page;
|
||
}
|
||
}
|
||
}
|
||
|
||
// If English page doesn't exist, redirect back with error
|
||
if (!$enPage) {
|
||
$this->addFlash('error', 'La page wiki pour la clé "' . $key . '" n\'existe pas.');
|
||
return $this->redirectToRoute('app_admin_wiki');
|
||
}
|
||
|
||
// Get detailed content comparison from JSON file
|
||
$detailedComparison = null;
|
||
$mediaDiff = 0;
|
||
$historyData = null;
|
||
$prevPage = null;
|
||
$nextPage = null;
|
||
$stalenessDistribution = null;
|
||
|
||
if (file_exists($jsonFile)) {
|
||
// Use memory-efficient approach to extract only the necessary data
|
||
$maxItems = 100; // Limit the number of items to prevent memory exhaustion
|
||
|
||
// Extract history data if available
|
||
$historyData = [];
|
||
|
||
// Get history data from the JSON file
|
||
$historyEntries = $this->extractJsonArrayByKey($jsonFile, 'history', $maxItems);
|
||
|
||
// Process history data for the current key
|
||
foreach ($historyEntries as $timestamp => $entry) {
|
||
$historyEntry = [
|
||
'timestamp' => $timestamp,
|
||
'date' => is_string($timestamp) && !empty($timestamp) && $timestamp !== '0' ?
|
||
(new \DateTime($timestamp))->format('Y-m-d') : 'N/A',
|
||
'metrics' => []
|
||
];
|
||
|
||
// Check regular_pages
|
||
if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) {
|
||
foreach ($entry['regular_pages'] as $page) {
|
||
if (isset($page['key']) && $page['key'] === $key) {
|
||
// Extract metrics
|
||
$historyEntry['metrics'] = [
|
||
'staleness_score' => $page['staleness_score'] ?? 0,
|
||
'date_diff' => $page['date_diff'] ?? 0,
|
||
'word_diff' => $page['word_diff'] ?? 0,
|
||
'section_diff' => $page['section_diff'] ?? 0,
|
||
'link_diff' => $page['link_diff'] ?? 0,
|
||
'media_diff' => $page['media_diff'] ?? 0
|
||
];
|
||
$historyData[] = $historyEntry;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// If not found in regular_pages, check specific_pages
|
||
if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) {
|
||
foreach ($entry['specific_pages'] as $page) {
|
||
if (isset($page['key']) && $page['key'] === $key) {
|
||
// Extract metrics
|
||
$historyEntry['metrics'] = [
|
||
'staleness_score' => $page['staleness_score'] ?? 0,
|
||
'date_diff' => $page['date_diff'] ?? 0,
|
||
'word_diff' => $page['word_diff'] ?? 0,
|
||
'section_diff' => $page['section_diff'] ?? 0,
|
||
'link_diff' => $page['link_diff'] ?? 0,
|
||
'media_diff' => $page['media_diff'] ?? 0
|
||
];
|
||
$historyData[] = $historyEntry;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Sort history data by timestamp
|
||
usort($historyData, function($a, $b) {
|
||
return strtotime($a['timestamp']) - strtotime($b['timestamp']);
|
||
});
|
||
|
||
// Get regular_pages and specific_pages arrays
|
||
$regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems);
|
||
$specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems);
|
||
|
||
// Combine them into a single array
|
||
$allPages = array_merge($regularPages, $specificPages);
|
||
|
||
// Sort pages by staleness score (descending)
|
||
usort($allPages, function($a, $b) {
|
||
$scoreA = $a['staleness_score'] ?? 0;
|
||
$scoreB = $b['staleness_score'] ?? 0;
|
||
return $scoreB <=> $scoreA; // Descending order
|
||
});
|
||
|
||
// Find the current page index in the sorted array
|
||
$currentIndex = -1;
|
||
foreach ($allPages as $index => $page) {
|
||
if (isset($page['key']) && $page['key'] === $key) {
|
||
$currentIndex = $index;
|
||
break;
|
||
}
|
||
}
|
||
|
||
// Determine previous and next pages
|
||
if ($currentIndex > 0) {
|
||
$prevPage = $allPages[$currentIndex - 1];
|
||
}
|
||
|
||
if ($currentIndex < count($allPages) - 1 && $currentIndex >= 0) {
|
||
$nextPage = $allPages[$currentIndex + 1];
|
||
}
|
||
|
||
// Create staleness score distribution data for histogram
|
||
$stalenessScores = [];
|
||
foreach ($allPages as $page) {
|
||
if (isset($page['staleness_score'])) {
|
||
$stalenessScores[] = $page['staleness_score'];
|
||
}
|
||
}
|
||
|
||
if (!empty($stalenessScores)) {
|
||
// Calculate statistics
|
||
$min = min($stalenessScores);
|
||
$max = max($stalenessScores);
|
||
$avg = array_sum($stalenessScores) / count($stalenessScores);
|
||
$median = $this->calculateMedian($stalenessScores);
|
||
|
||
// Create histogram bins (10 bins)
|
||
$binCount = 10;
|
||
$binSize = ($max - $min) / $binCount;
|
||
$bins = [];
|
||
$binLabels = [];
|
||
|
||
// Initialize bins
|
||
for ($i = 0; $i < $binCount; $i++) {
|
||
$bins[$i] = 0;
|
||
$binStart = $min + ($i * $binSize);
|
||
$binEnd = $binStart + $binSize;
|
||
$binLabels[$i] = round($binStart, 1) . ' - ' . round($binEnd, 1);
|
||
}
|
||
|
||
// Count scores in each bin
|
||
foreach ($stalenessScores as $score) {
|
||
$binIndex = min($binCount - 1, floor(($score - $min) / $binSize));
|
||
$bins[$binIndex]++;
|
||
}
|
||
|
||
// Find which bin the current page falls into
|
||
$currentPageScore = 0;
|
||
foreach ($allPages as $page) {
|
||
if (isset($page['key']) && $page['key'] === $key && isset($page['staleness_score'])) {
|
||
$currentPageScore = $page['staleness_score'];
|
||
break;
|
||
}
|
||
}
|
||
|
||
$currentPageBin = min($binCount - 1, floor(($currentPageScore - $min) / $binSize));
|
||
|
||
$stalenessDistribution = [
|
||
'scores' => $stalenessScores,
|
||
'min' => $min,
|
||
'max' => $max,
|
||
'avg' => $avg,
|
||
'median' => $median,
|
||
'bins' => $bins,
|
||
'binLabels' => $binLabels,
|
||
'currentPageScore' => $currentPageScore,
|
||
'currentPageBin' => $currentPageBin,
|
||
'totalPages' => count($stalenessScores)
|
||
];
|
||
}
|
||
|
||
// Find the page with the matching key
|
||
foreach ($allPages as $page) {
|
||
if (isset($page['key']) && $page['key'] === $key) {
|
||
$mediaComparison = $page['media_comparison'] ?? null;
|
||
|
||
// Deduplicate images by URL in the controller and filter out images that appear in both languages
|
||
if ($mediaComparison) {
|
||
// Get all image URLs from both languages
|
||
$enOnlyImages = $mediaComparison['en_only'] ?? [];
|
||
$frOnlyImages = $mediaComparison['fr_only'] ?? [];
|
||
$commonImages = $mediaComparison['common'] ?? [];
|
||
|
||
// Extract all URLs from French images
|
||
$frImageUrls = [];
|
||
foreach ($frOnlyImages as $media) {
|
||
$frImageUrls[] = $media['src'];
|
||
}
|
||
|
||
// Also add URLs from common images (French side)
|
||
foreach ($commonImages as $commonMedia) {
|
||
if (isset($commonMedia['fr']['src'])) {
|
||
$frImageUrls[] = $commonMedia['fr']['src'];
|
||
}
|
||
}
|
||
|
||
// Extract all URLs from English images
|
||
$enImageUrls = [];
|
||
foreach ($enOnlyImages as $media) {
|
||
$enImageUrls[] = $media['src'];
|
||
}
|
||
|
||
// Also add URLs from common images (English side)
|
||
foreach ($commonImages as $commonMedia) {
|
||
if (isset($commonMedia['en']['src'])) {
|
||
$enImageUrls[] = $commonMedia['en']['src'];
|
||
}
|
||
}
|
||
|
||
// Process English-only images - deduplicate and filter out those that appear in French
|
||
$enUniqueImages = [];
|
||
$enProcessedUrls = [];
|
||
|
||
foreach ($enOnlyImages as $media) {
|
||
// Skip if this URL is already processed or if it appears in French images
|
||
if (!in_array($media['src'], $enProcessedUrls) && !in_array($media['src'], $frImageUrls)) {
|
||
$enProcessedUrls[] = $media['src'];
|
||
$enUniqueImages[] = $media;
|
||
}
|
||
}
|
||
|
||
// Process French-only images - deduplicate and filter out those that appear in English
|
||
$frUniqueImages = [];
|
||
$frProcessedUrls = [];
|
||
|
||
foreach ($frOnlyImages as $media) {
|
||
// Skip if this URL is already processed or if it appears in English images
|
||
if (!in_array($media['src'], $frProcessedUrls) && !in_array($media['src'], $enImageUrls)) {
|
||
$frProcessedUrls[] = $media['src'];
|
||
$frUniqueImages[] = $media;
|
||
}
|
||
}
|
||
|
||
// Replace the arrays with deduplicated and filtered versions
|
||
$mediaComparison['en_only'] = $enUniqueImages;
|
||
$mediaComparison['fr_only'] = $frUniqueImages;
|
||
$mediaComparison['en_only_count'] = count($enOnlyImages);
|
||
$mediaComparison['fr_only_count'] = count($frOnlyImages);
|
||
}
|
||
|
||
// Get link comparison data
|
||
$linkComparison = $page['link_comparison'] ?? null;
|
||
|
||
// Sort links alphabetically by URL if link comparison exists
|
||
if ($linkComparison) {
|
||
// Sort English-only links
|
||
if (isset($linkComparison['en_only']) && is_array($linkComparison['en_only'])) {
|
||
usort($linkComparison['en_only'], function ($a, $b) {
|
||
return strcmp($a['href'], $b['href']);
|
||
});
|
||
}
|
||
|
||
// Sort French-only links
|
||
if (isset($linkComparison['fr_only']) && is_array($linkComparison['fr_only'])) {
|
||
usort($linkComparison['fr_only'], function ($a, $b) {
|
||
return strcmp($a['href'], $b['href']);
|
||
});
|
||
}
|
||
|
||
// Sort common links
|
||
if (isset($linkComparison['common']) && is_array($linkComparison['common'])) {
|
||
usort($linkComparison['common'], function ($a, $b) {
|
||
return strcmp($a['en']['href'], $b['en']['href']);
|
||
});
|
||
}
|
||
}
|
||
|
||
// Get section comparison data and filter out "Contents" sections and navigation sections
|
||
$sectionComparison = $page['section_comparison'] ?? null;
|
||
|
||
// Sections to exclude from comparison (navigation elements)
|
||
$excludedSections = [
|
||
'Contents', 'Sommaire',
|
||
'Personal tools', 'Namespaces', 'Views', 'Search', 'Site', 'Tools', 'In other projects'
|
||
];
|
||
|
||
// Filter out excluded sections if section comparison exists
|
||
if ($sectionComparison) {
|
||
// Filter common sections
|
||
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||
$sectionComparison['common'] = array_filter($sectionComparison['common'], function ($section) use ($excludedSections) {
|
||
// Skip if either English or French title is in the excluded list
|
||
return !(in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections));
|
||
});
|
||
// Re-index array
|
||
$sectionComparison['common'] = array_values($sectionComparison['common']);
|
||
}
|
||
|
||
// Filter English-only sections
|
||
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
|
||
$sectionComparison['en_only'] = array_filter($sectionComparison['en_only'], function ($section) use ($excludedSections) {
|
||
return !in_array($section['title'], $excludedSections);
|
||
});
|
||
// Re-index array
|
||
$sectionComparison['en_only'] = array_values($sectionComparison['en_only']);
|
||
}
|
||
|
||
// Filter French-only sections
|
||
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
|
||
$sectionComparison['fr_only'] = array_filter($sectionComparison['fr_only'], function ($section) use ($excludedSections) {
|
||
return !in_array($section['title'], $excludedSections);
|
||
});
|
||
// Re-index array
|
||
$sectionComparison['fr_only'] = array_values($sectionComparison['fr_only']);
|
||
}
|
||
}
|
||
|
||
// Calculate adjusted section counts (excluding "Contents" sections)
|
||
$enSectionCount = $enPage['sections'];
|
||
$frSectionCount = $frPage['sections'];
|
||
|
||
// Adjust section counts if we have section comparison data
|
||
if ($sectionComparison) {
|
||
// Count how many sections were filtered out
|
||
$filteredCount = 0;
|
||
|
||
// Check common sections that were filtered
|
||
if (isset($page['section_comparison']['common']) && is_array($page['section_comparison']['common'])) {
|
||
foreach ($page['section_comparison']['common'] as $section) {
|
||
if (in_array($section['en']['title'], $excludedSections) || in_array($section['fr']['title'], $excludedSections)) {
|
||
$filteredCount++;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Check English-only sections that were filtered
|
||
if (isset($page['section_comparison']['en_only']) && is_array($page['section_comparison']['en_only'])) {
|
||
foreach ($page['section_comparison']['en_only'] as $section) {
|
||
if (in_array($section['title'], $excludedSections)) {
|
||
$filteredCount++;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Check French-only sections that were filtered
|
||
if (isset($page['section_comparison']['fr_only']) && is_array($page['section_comparison']['fr_only'])) {
|
||
foreach ($page['section_comparison']['fr_only'] as $section) {
|
||
if (in_array($section['title'], $excludedSections)) {
|
||
$filteredCount++;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Adjust section counts
|
||
$enSectionCount -= $filteredCount;
|
||
$frSectionCount -= $filteredCount;
|
||
}
|
||
|
||
// Check for incorrect heading hierarchies
|
||
$enHierarchyErrors = [];
|
||
$frHierarchyErrors = [];
|
||
|
||
// Check English sections
|
||
if (isset($sectionComparison['en_only']) && is_array($sectionComparison['en_only'])) {
|
||
$enHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['en_only']);
|
||
}
|
||
|
||
// Also check common sections (English side)
|
||
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||
$commonEnSections = array_map(function ($section) {
|
||
return $section['en'];
|
||
}, $sectionComparison['common']);
|
||
|
||
$enHierarchyErrors = array_merge($enHierarchyErrors, $this->detectHeadingHierarchyErrors($commonEnSections));
|
||
}
|
||
|
||
// Check French sections
|
||
if (isset($sectionComparison['fr_only']) && is_array($sectionComparison['fr_only'])) {
|
||
$frHierarchyErrors = $this->detectHeadingHierarchyErrors($sectionComparison['fr_only']);
|
||
}
|
||
|
||
// Also check common sections (French side)
|
||
if (isset($sectionComparison['common']) && is_array($sectionComparison['common'])) {
|
||
$commonFrSections = array_map(function ($section) {
|
||
return $section['fr'];
|
||
}, $sectionComparison['common']);
|
||
|
||
$frHierarchyErrors = array_merge($frHierarchyErrors, $this->detectHeadingHierarchyErrors($commonFrSections));
|
||
}
|
||
|
||
// Build aligned section list for better visualization of missing sections
|
||
$alignedSections = $this->buildAlignedSectionList($sectionComparison);
|
||
|
||
$detailedComparison = [
|
||
'section_comparison' => $sectionComparison,
|
||
'aligned_sections' => $alignedSections,
|
||
'link_comparison' => $linkComparison,
|
||
'media_comparison' => $mediaComparison,
|
||
'category_comparison' => $page['category_comparison'] ?? null,
|
||
'grammar_suggestions' => $page['grammar_suggestions'] ?? null,
|
||
'adjusted_en_section_count' => $enSectionCount,
|
||
'adjusted_fr_section_count' => $frSectionCount,
|
||
'en_hierarchy_errors' => $enHierarchyErrors,
|
||
'fr_hierarchy_errors' => $frHierarchyErrors
|
||
];
|
||
|
||
$mediaDiff = $page['media_diff'] ?? 0;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Calculate staleness score components
|
||
$scoreComponents = [];
|
||
|
||
if ($frPage) {
|
||
// Calculate date difference in days
|
||
$dateDiff = 0;
|
||
if ($enPage['last_modified'] && $frPage['last_modified']) {
|
||
$enDate = \DateTime::createFromFormat('Y-m-d', $enPage['last_modified']);
|
||
$frDate = \DateTime::createFromFormat('Y-m-d', $frPage['last_modified']);
|
||
if ($enDate && $frDate) {
|
||
$dateDiff = ($enDate->getTimestamp() - $frDate->getTimestamp()) / (60 * 60 * 24);
|
||
}
|
||
}
|
||
|
||
// Calculate content differences
|
||
$wordDiff = $enPage['word_count'] - $frPage['word_count'];
|
||
$sectionDiff = $enPage['sections'] - $frPage['sections'];
|
||
$linkDiff = $enPage['link_count'] - $frPage['link_count'];
|
||
|
||
// Calculate score components
|
||
$dateComponent = abs($dateDiff) * 0.2;
|
||
$wordComponent = (abs($wordDiff) / 100) * 0.5;
|
||
$sectionComponent = abs($sectionDiff) * 0.15;
|
||
$linkComponent = (abs($linkDiff) / 10) * 0.15;
|
||
|
||
$scoreComponents = [
|
||
'date' => [
|
||
'value' => $dateDiff,
|
||
'weight' => 0.2,
|
||
'component' => $dateComponent,
|
||
'description' => 'Différence de date (en jours)'
|
||
],
|
||
'word' => [
|
||
'value' => $wordDiff,
|
||
'weight' => 0.5,
|
||
'component' => $wordComponent,
|
||
'description' => 'Différence de nombre de mots'
|
||
],
|
||
'section' => [
|
||
'value' => $sectionDiff,
|
||
'weight' => 0.15,
|
||
'component' => $sectionComponent,
|
||
'description' => 'Différence de nombre de sections'
|
||
],
|
||
'link' => [
|
||
'value' => $linkDiff,
|
||
'weight' => 0.15,
|
||
'component' => $linkComponent,
|
||
'description' => 'Différence de nombre de liens'
|
||
]
|
||
];
|
||
|
||
// Add media component if available
|
||
if (isset($enPage['media_count']) && isset($frPage['media_count'])) {
|
||
$mediaComponent = (abs($mediaDiff) / 5) * 0.1;
|
||
$scoreComponents['media'] = [
|
||
'value' => $mediaDiff,
|
||
'weight' => 0.1,
|
||
'component' => $mediaComponent,
|
||
'description' => 'Différence de nombre d\'images'
|
||
];
|
||
|
||
// Adjust other weights to maintain total of 1.0
|
||
$scoreComponents['date']['weight'] = 0.2;
|
||
$scoreComponents['word']['weight'] = 0.45;
|
||
$scoreComponents['section']['weight'] = 0.15;
|
||
$scoreComponents['link']['weight'] = 0.1;
|
||
}
|
||
}
|
||
|
||
// Create URL for new French page if it doesn't exist
|
||
$createFrUrl = null;
|
||
if (!$frPage) {
|
||
$createFrUrl = 'https://wiki.openstreetmap.org/wiki/FR:' . $key;
|
||
}
|
||
|
||
// Format section titles for copy functionality
|
||
$enSections = '';
|
||
$frSections = '';
|
||
|
||
if ($detailedComparison && $detailedComparison['section_comparison']) {
|
||
// English sections
|
||
if ($enPage) {
|
||
$enSectionsList = [];
|
||
|
||
// Add common sections
|
||
foreach ($detailedComparison['section_comparison']['common'] as $section) {
|
||
$enSectionsList[] = str_repeat('=', $section['en']['level']) . ' ' .
|
||
$section['en']['title'] . ' ' .
|
||
str_repeat('=', $section['en']['level']);
|
||
}
|
||
|
||
// Add English-only sections
|
||
foreach ($detailedComparison['section_comparison']['en_only'] as $section) {
|
||
$enSectionsList[] = str_repeat('=', $section['level']) . ' ' .
|
||
$section['title'] . ' ' .
|
||
str_repeat('=', $section['level']) . ' (EN only)';
|
||
}
|
||
|
||
$enSections = implode("\n", $enSectionsList);
|
||
}
|
||
|
||
// French sections
|
||
if ($frPage) {
|
||
$frSectionsList = [];
|
||
|
||
// Add common sections
|
||
foreach ($detailedComparison['section_comparison']['common'] as $section) {
|
||
$frSectionsList[] = str_repeat('=', $section['fr']['level']) . ' ' .
|
||
$section['fr']['title'] . ' ' .
|
||
str_repeat('=', $section['fr']['level']);
|
||
}
|
||
|
||
// Add French-only sections
|
||
foreach ($detailedComparison['section_comparison']['fr_only'] as $section) {
|
||
$frSectionsList[] = str_repeat('=', $section['level']) . ' ' .
|
||
$section['title'] . ' ' .
|
||
str_repeat('=', $section['level']) . ' (FR only)';
|
||
}
|
||
|
||
$frSections = implode("\n", $frSectionsList);
|
||
}
|
||
}
|
||
|
||
// Format links for copy functionality
|
||
$enLinks = '';
|
||
$frLinks = '';
|
||
|
||
if ($detailedComparison && $detailedComparison['link_comparison']) {
|
||
// English links
|
||
if ($enPage) {
|
||
$enLinksList = [];
|
||
|
||
// Add common links
|
||
foreach ($detailedComparison['link_comparison']['common'] as $link) {
|
||
$enLinksList[] = $link['en']['text'] . ' - ' . $link['en']['href'];
|
||
}
|
||
|
||
// Add English-only links
|
||
foreach ($detailedComparison['link_comparison']['en_only'] as $link) {
|
||
$enLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (EN only)';
|
||
}
|
||
|
||
$enLinks = implode("\n", $enLinksList);
|
||
}
|
||
|
||
// French links
|
||
if ($frPage) {
|
||
$frLinksList = [];
|
||
|
||
// Add common links
|
||
foreach ($detailedComparison['link_comparison']['common'] as $link) {
|
||
$frLinksList[] = $link['fr']['text'] . ' - ' . $link['fr']['href'];
|
||
}
|
||
|
||
// Add French-only links
|
||
foreach ($detailedComparison['link_comparison']['fr_only'] as $link) {
|
||
$frLinksList[] = $link['text'] . ' - ' . $link['href'] . ' (FR only)';
|
||
}
|
||
|
||
$frLinks = implode("\n", $frLinksList);
|
||
}
|
||
}
|
||
|
||
// Ensure page URLs are strings to prevent array to string conversion errors
|
||
if ($frPage && isset($frPage['url']) && is_array($frPage['url'])) {
|
||
$frPage['url'] = json_encode($frPage['url']);
|
||
}
|
||
|
||
if ($enPage && isset($enPage['url']) && is_array($enPage['url'])) {
|
||
$enPage['url'] = json_encode($enPage['url']);
|
||
}
|
||
|
||
return $this->render('admin/wiki_compare.html.twig', [
|
||
'key' => $key,
|
||
'en_page' => $enPage,
|
||
'fr_page' => $frPage,
|
||
'score_components' => $scoreComponents,
|
||
'create_fr_url' => $createFrUrl,
|
||
'detailed_comparison' => $detailedComparison,
|
||
'en_sections' => $enSections,
|
||
'fr_sections' => $frSections,
|
||
'en_links' => $enLinks,
|
||
'fr_links' => $frLinks,
|
||
'history_data' => $historyData,
|
||
'prev_page' => $prevPage,
|
||
'next_page' => $nextPage,
|
||
'staleness_distribution' => $stalenessDistribution
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* Extracts the main content from the HTML, removing headers, footers, and other unnecessary elements
|
||
*
|
||
* @param string $html The full HTML content
|
||
* @return string The extracted main content
|
||
*/
|
||
private function extractMainContent(string $html): string
|
||
{
|
||
// Use a simple approach to extract the content
|
||
// This could be improved with a more sophisticated HTML parser if needed
|
||
|
||
// Create a DOMDocument to parse the HTML
|
||
$dom = new \DOMDocument();
|
||
|
||
// Suppress warnings about malformed HTML
|
||
libxml_use_internal_errors(true);
|
||
$dom->loadHTML($html);
|
||
libxml_clear_errors();
|
||
|
||
// Try to find the main content element
|
||
$contentElement = null;
|
||
|
||
// First, try to find the element with id "mw-content-text"
|
||
$contentElement = $dom->getElementById('mw-content-text');
|
||
|
||
// If not found, try to find the element with class "mw-content-ltr"
|
||
if (!$contentElement) {
|
||
$xpath = new \DOMXPath($dom);
|
||
$elements = $xpath->query("//*[contains(@class, 'mw-content-ltr')]");
|
||
if ($elements->length > 0) {
|
||
$contentElement = $elements->item(0);
|
||
}
|
||
}
|
||
|
||
// If still not found, return the original HTML
|
||
if (!$contentElement) {
|
||
return $html;
|
||
}
|
||
|
||
// Get the HTML of the content element
|
||
$contentHtml = $dom->saveHTML($contentElement);
|
||
|
||
// Clean up the content HTML
|
||
// Remove script and style elements
|
||
$contentHtml = preg_replace('/<script\b[^>]*>(.*?)<\/script>/is', '', $contentHtml);
|
||
$contentHtml = preg_replace('/<style\b[^>]*>(.*?)<\/style>/is', '', $contentHtml);
|
||
|
||
// Remove edit section links
|
||
$contentHtml = preg_replace('/<span class="mw-editsection">(.*?)<\/span>/is', '', $contentHtml);
|
||
|
||
return $contentHtml;
|
||
}
|
||
|
||
/**
|
||
* Extracts an array from a large JSON file by key without loading the entire file into memory
|
||
*
|
||
* @param string $filePath Path to the JSON file
|
||
* @param string $key The key of the array to extract
|
||
* @param int $maxItems Maximum number of items to extract (to prevent memory exhaustion)
|
||
* @return array The extracted array
|
||
*/
|
||
private function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array
|
||
{
|
||
$result = [];
|
||
|
||
// First, check if the file exists and is readable
|
||
if (!is_readable($filePath)) {
|
||
error_log("File is not readable: $filePath");
|
||
return $result;
|
||
}
|
||
|
||
// Get the file size
|
||
$fileSize = filesize($filePath);
|
||
if ($fileSize === false || $fileSize === 0) {
|
||
error_log("File is empty or size could not be determined: $filePath");
|
||
return $result;
|
||
}
|
||
|
||
try {
|
||
// For very large files, we'll use a more efficient approach
|
||
// We'll search for the specified key directly
|
||
$handle = fopen($filePath, 'r');
|
||
if (!$handle) {
|
||
error_log("Could not open file: $filePath");
|
||
return $result;
|
||
}
|
||
|
||
// Variables to track parsing state
|
||
$bracketCount = 0;
|
||
$buffer = '';
|
||
$itemCount = 0;
|
||
$inArray = false;
|
||
$arrayStarted = false;
|
||
|
||
// Skip ahead to find the specified key more quickly
|
||
$found = false;
|
||
$searchKey = '"' . $key . '"';
|
||
|
||
while (!$found && ($line = fgets($handle)) !== false) {
|
||
if (strpos($line, $searchKey) !== false) {
|
||
$found = true;
|
||
|
||
// Extract everything after the key
|
||
$keyPos = strpos($line, $searchKey);
|
||
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||
|
||
// Find the colon and then the opening bracket
|
||
if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) {
|
||
$inArray = true;
|
||
$arrayStarted = true;
|
||
$bracketPos = strpos($afterKey, '[');
|
||
$buffer = '['; // Start the buffer with an opening bracket
|
||
$bracketCount = 1;
|
||
|
||
// Add everything after the opening bracket to the buffer
|
||
$buffer .= substr($afterKey, $bracketPos + 1);
|
||
} else if (strpos($afterKey, ':') !== false) {
|
||
// The opening bracket might be on the next line
|
||
$inArray = true;
|
||
}
|
||
|
||
break;
|
||
}
|
||
}
|
||
|
||
// If we didn't find the key, return empty array
|
||
if (!$found) {
|
||
fclose($handle);
|
||
error_log("Key '$key' not found in file: $filePath");
|
||
return $result;
|
||
}
|
||
|
||
// If we found the key but not the opening bracket yet, look for it
|
||
if ($inArray && !$arrayStarted) {
|
||
while (($line = fgets($handle)) !== false) {
|
||
if (strpos($line, '[') !== false) {
|
||
$bracketPos = strpos($line, '[');
|
||
$buffer = '['; // Start the buffer with an opening bracket
|
||
$bracketCount = 1;
|
||
$arrayStarted = true;
|
||
|
||
// Add everything after the opening bracket to the buffer
|
||
$buffer .= substr($line, $bracketPos + 1);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// If we still haven't found the opening bracket, something is wrong
|
||
if (!$arrayStarted) {
|
||
fclose($handle);
|
||
error_log("Could not find opening bracket for array '$key' in file: $filePath");
|
||
return $result;
|
||
}
|
||
|
||
// Now process the array
|
||
$collectingItems = true;
|
||
while ($collectingItems && ($line = fgets($handle)) !== false) {
|
||
// Count opening and closing brackets to track array nesting
|
||
$openBrackets = substr_count($line, '[') + substr_count($line, '{');
|
||
$closeBrackets = substr_count($line, ']') + substr_count($line, '}');
|
||
$bracketCount += $openBrackets - $closeBrackets;
|
||
|
||
// Add the line to our buffer
|
||
$buffer .= $line;
|
||
|
||
// If we've reached the end of the array (bracketCount = 0)
|
||
if ($bracketCount === 0) {
|
||
$collectingItems = false;
|
||
|
||
// Try to parse the buffer as JSON
|
||
try {
|
||
$parsedData = json_decode($buffer, true);
|
||
if (json_last_error() !== JSON_ERROR_NONE) {
|
||
error_log("JSON parse error: " . json_last_error_msg() . " for key '$key'");
|
||
|
||
// Try a different approach - manually construct a valid JSON array
|
||
// Split the buffer by objects (each starting with { and ending with })
|
||
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||
|
||
if (!empty($matches[0])) {
|
||
// Take the first $maxItems objects
|
||
$objects = array_slice($matches[0], 0, $maxItems);
|
||
|
||
// Construct a valid JSON array
|
||
$validJson = '[' . implode(',', $objects) . ']';
|
||
|
||
// Try to parse the valid JSON
|
||
$parsedData = json_decode($validJson, true);
|
||
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||
$result = $parsedData;
|
||
} else {
|
||
error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'");
|
||
}
|
||
}
|
||
} else if (is_array($parsedData)) {
|
||
// Limit the number of items to prevent memory exhaustion
|
||
$result = array_slice($parsedData, 0, $maxItems);
|
||
}
|
||
} catch (\Exception $e) {
|
||
error_log("Exception parsing JSON for key '$key': " . $e->getMessage());
|
||
}
|
||
|
||
break;
|
||
}
|
||
|
||
// Check if we've found a complete item (when we see a closing brace followed by a comma)
|
||
// This is used to count items and limit the number of items processed
|
||
if (preg_match('/\}\s*,\s*$/m', $line)) {
|
||
$itemCount++;
|
||
|
||
// If we've reached the maximum number of items, stop processing
|
||
if ($itemCount >= $maxItems) {
|
||
$collectingItems = false;
|
||
|
||
// Create a valid JSON array with the items we've collected so far
|
||
// We need to ensure the buffer ends with a complete JSON object and a closing bracket
|
||
|
||
// First, find the last complete object (ending with })
|
||
$lastObjectEnd = strrpos($buffer, '}');
|
||
if ($lastObjectEnd !== false) {
|
||
// Truncate the buffer at the end of the last complete object
|
||
$buffer = substr($buffer, 0, $lastObjectEnd + 1);
|
||
// Add the closing bracket for the array
|
||
$buffer .= ']';
|
||
|
||
// Try to parse the buffer as JSON
|
||
try {
|
||
$parsedData = json_decode($buffer, true);
|
||
if (json_last_error() !== JSON_ERROR_NONE) {
|
||
error_log("JSON parse error after max items: " . json_last_error_msg() . " for key '$key'");
|
||
|
||
// Try a different approach - manually construct a valid JSON array
|
||
// Split the buffer by objects (each starting with { and ending with })
|
||
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||
|
||
if (!empty($matches[0])) {
|
||
// Take the first $maxItems objects
|
||
$objects = array_slice($matches[0], 0, $maxItems);
|
||
|
||
// Construct a valid JSON array
|
||
$validJson = '[' . implode(',', $objects) . ']';
|
||
|
||
// Try to parse the valid JSON
|
||
$parsedData = json_decode($validJson, true);
|
||
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||
$result = $parsedData;
|
||
} else {
|
||
error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'");
|
||
}
|
||
}
|
||
} else if (is_array($parsedData)) {
|
||
$result = $parsedData;
|
||
}
|
||
} catch (\Exception $e) {
|
||
error_log("Exception parsing JSON after max items for key '$key': " . $e->getMessage());
|
||
}
|
||
} else {
|
||
error_log("Could not find the end of the last complete object for key '$key'");
|
||
}
|
||
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Close the file
|
||
fclose($handle);
|
||
|
||
} catch (\Exception $e) {
|
||
error_log("Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage());
|
||
}
|
||
|
||
return $result;
|
||
}
|
||
|
||
/**
|
||
* Extracts a scalar value from a large JSON file by key without loading the entire file into memory
|
||
*
|
||
* @param string $filePath Path to the JSON file
|
||
* @param string $key The key of the scalar value to extract
|
||
* @return mixed The extracted scalar value or null if not found
|
||
*/
|
||
private function extractJsonScalarByKey(string $filePath, string $key): mixed
|
||
{
|
||
// First, check if the file exists and is readable
|
||
if (!is_readable($filePath)) {
|
||
error_log("File is not readable: $filePath");
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
// For very large files, we'll use a more efficient approach
|
||
// We'll search for the specified key directly
|
||
$handle = fopen($filePath, 'r');
|
||
if (!$handle) {
|
||
error_log("Could not open file: $filePath");
|
||
return null;
|
||
}
|
||
|
||
// Skip ahead to find the specified key more quickly
|
||
$found = false;
|
||
$searchKey = '"' . $key . '"';
|
||
$value = null;
|
||
|
||
while (!$found && ($line = fgets($handle)) !== false) {
|
||
if (strpos($line, $searchKey) !== false) {
|
||
$found = true;
|
||
|
||
// Extract everything after the key
|
||
$keyPos = strpos($line, $searchKey);
|
||
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||
|
||
// Check if the value is on this line
|
||
if (strpos($afterKey, ':') !== false) {
|
||
$colonPos = strpos($afterKey, ':');
|
||
$afterColon = trim(substr($afterKey, $colonPos + 1));
|
||
|
||
// Extract the value based on its type
|
||
if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) {
|
||
// String value
|
||
$value = $matches[1];
|
||
} elseif (preg_match('/^(\d+)/', $afterColon, $matches)) {
|
||
// Numeric value
|
||
$value = intval($matches[1]);
|
||
} elseif (preg_match('/^(true|false)/', $afterColon, $matches)) {
|
||
// Boolean value
|
||
$value = ($matches[1] === 'true');
|
||
} elseif (strpos($afterColon, 'null') === 0) {
|
||
// Null value
|
||
$value = null;
|
||
} else {
|
||
// The value might be on the next line or more complex
|
||
// For simplicity, we'll just use the regex approach as a fallback
|
||
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) {
|
||
// String value
|
||
$value = $matches[1];
|
||
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) {
|
||
// Numeric value
|
||
$value = intval($matches[1]);
|
||
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) {
|
||
// Boolean value
|
||
$value = ($matches[1] === 'true');
|
||
} elseif (strpos($line, 'null') !== false) {
|
||
// Null value
|
||
$value = null;
|
||
} else {
|
||
error_log("Could not extract value for key '$key' from line: " . trim($line));
|
||
}
|
||
}
|
||
} else {
|
||
// The value might be on the next line
|
||
error_log("Value for key '$key' might be on the next line, using fallback method");
|
||
|
||
// Read the next line
|
||
$nextLine = fgets($handle);
|
||
if ($nextLine !== false) {
|
||
$combinedLine = $line . $nextLine;
|
||
|
||
// Try to extract the value using regex
|
||
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) {
|
||
// String value
|
||
$value = $matches[1];
|
||
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) {
|
||
// Numeric value
|
||
$value = intval($matches[1]);
|
||
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) {
|
||
// Boolean value
|
||
$value = ($matches[1] === 'true');
|
||
} elseif (strpos($combinedLine, 'null') !== false) {
|
||
// Null value
|
||
$value = null;
|
||
} else {
|
||
error_log("Could not extract value for key '$key' from combined lines");
|
||
}
|
||
}
|
||
}
|
||
|
||
break;
|
||
}
|
||
}
|
||
|
||
// Close the file
|
||
fclose($handle);
|
||
|
||
if (!$found) {
|
||
error_log("Key '$key' not found in file: $filePath");
|
||
} else if ($value === null) {
|
||
error_log("Value for key '$key' is null or could not be extracted");
|
||
}
|
||
|
||
return $value;
|
||
|
||
} catch (\Exception $e) {
|
||
error_log("Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage());
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Extracts the specific_pages array from a large JSON file without loading the entire file into memory
|
||
* This is a legacy method kept for backward compatibility
|
||
*
|
||
* @param string $filePath Path to the JSON file
|
||
* @param int $maxPages Maximum number of pages to extract (to prevent memory exhaustion)
|
||
* @return array The extracted specific_pages array
|
||
*/
|
||
private function extractSpecificPagesFromJson(string $filePath, int $maxPages = 100): array
|
||
{
|
||
return $this->extractJsonArrayByKey($filePath, 'specific_pages', $maxPages);
|
||
}
|
||
|
||
/**
|
||
* Calculate the median value of an array of numbers
|
||
*
|
||
* @param array $array Array of numbers
|
||
* @return float The median value
|
||
*/
|
||
private function calculateMedian(array $array): float
|
||
{
|
||
sort($array);
|
||
$count = count($array);
|
||
|
||
if ($count === 0) {
|
||
return 0;
|
||
}
|
||
|
||
$middle = floor($count / 2);
|
||
|
||
if ($count % 2 === 0) {
|
||
// Even number of elements, average the two middle values
|
||
return ($array[$middle - 1] + $array[$middle]) / 2;
|
||
} else {
|
||
// Odd number of elements, return the middle value
|
||
return $array[$middle];
|
||
}
|
||
}
|
||
} |