add history in articles measures

This commit is contained in:
Tykayn 2025-09-08 10:20:51 +02:00 committed by tykayn
parent 1ed74c2e2f
commit 381f378db4
9 changed files with 1678 additions and 195 deletions

View file

@ -44,6 +44,49 @@ class WikiController extends AbstractController
'json_exists' => file_exists($outdatedPagesFile)
]);
}
/**
* Displays the evolution of page rankings over time
*/
#[Route('/wiki/rankings', name: 'app_admin_wiki_rankings')]
public function pageRankings(): Response
{
$rankingsFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/page_rankings.json';
$timestamps = [];
$pages = [];
$globalMetrics = [];
$lastUpdated = null;
if (file_exists($rankingsFile)) {
// Load the rankings data
try {
$rankingsData = json_decode(file_get_contents($rankingsFile), true);
if (json_last_error() === JSON_ERROR_NONE) {
$timestamps = $rankingsData['timestamps'] ?? [];
$pages = $rankingsData['pages'] ?? [];
$globalMetrics = $rankingsData['global_metrics'] ?? [];
// Get the last timestamp as last_updated
if (!empty($timestamps)) {
$lastUpdated = end($timestamps);
}
}
} catch (\Exception $e) {
// Log the error
error_log("Error loading rankings data: " . $e->getMessage());
}
}
return $this->render('admin/wiki_rankings.html.twig', [
'timestamps' => $timestamps,
'pages' => $pages,
'global_metrics' => $globalMetrics,
'last_updated' => $lastUpdated,
'json_exists' => file_exists($rankingsFile)
]);
}
/**
* Detects incorrect heading hierarchies in a list of sections
* For example, h4 directly under h2 without h3 in between
@ -1306,6 +1349,9 @@ EOT;
$detailedComparison = null;
$mediaDiff = 0;
$historyData = null;
$prevPage = null;
$nextPage = null;
$stalenessDistribution = null;
if (file_exists($jsonFile)) {
// Use memory-efficient approach to extract only the necessary data
@ -1376,6 +1422,91 @@ EOT;
// Combine them into a single array
$allPages = array_merge($regularPages, $specificPages);
// Sort pages by staleness score (descending)
usort($allPages, function($a, $b) {
$scoreA = $a['staleness_score'] ?? 0;
$scoreB = $b['staleness_score'] ?? 0;
return $scoreB <=> $scoreA; // Descending order
});
// Find the current page index in the sorted array
$currentIndex = -1;
foreach ($allPages as $index => $page) {
if (isset($page['key']) && $page['key'] === $key) {
$currentIndex = $index;
break;
}
}
// Determine previous and next pages
if ($currentIndex > 0) {
$prevPage = $allPages[$currentIndex - 1];
}
if ($currentIndex < count($allPages) - 1 && $currentIndex >= 0) {
$nextPage = $allPages[$currentIndex + 1];
}
// Create staleness score distribution data for histogram
$stalenessScores = [];
foreach ($allPages as $page) {
if (isset($page['staleness_score'])) {
$stalenessScores[] = $page['staleness_score'];
}
}
if (!empty($stalenessScores)) {
// Calculate statistics
$min = min($stalenessScores);
$max = max($stalenessScores);
$avg = array_sum($stalenessScores) / count($stalenessScores);
$median = $this->calculateMedian($stalenessScores);
// Create histogram bins (10 bins)
$binCount = 10;
$binSize = ($max - $min) / $binCount;
$bins = [];
$binLabels = [];
// Initialize bins
for ($i = 0; $i < $binCount; $i++) {
$bins[$i] = 0;
$binStart = $min + ($i * $binSize);
$binEnd = $binStart + $binSize;
$binLabels[$i] = round($binStart, 1) . ' - ' . round($binEnd, 1);
}
// Count scores in each bin
foreach ($stalenessScores as $score) {
$binIndex = min($binCount - 1, floor(($score - $min) / $binSize));
$bins[$binIndex]++;
}
// Find which bin the current page falls into
$currentPageScore = 0;
foreach ($allPages as $page) {
if (isset($page['key']) && $page['key'] === $key && isset($page['staleness_score'])) {
$currentPageScore = $page['staleness_score'];
break;
}
}
$currentPageBin = min($binCount - 1, floor(($currentPageScore - $min) / $binSize));
$stalenessDistribution = [
'scores' => $stalenessScores,
'min' => $min,
'max' => $max,
'avg' => $avg,
'median' => $median,
'bins' => $bins,
'binLabels' => $binLabels,
'currentPageScore' => $currentPageScore,
'currentPageBin' => $currentPageBin,
'totalPages' => count($stalenessScores)
];
}
// Find the page with the matching key
foreach ($allPages as $page) {
@ -1792,7 +1923,10 @@ EOT;
'fr_sections' => $frSections,
'en_links' => $enLinks,
'fr_links' => $frLinks,
'history_data' => $historyData
'history_data' => $historyData,
'prev_page' => $prevPage,
'next_page' => $nextPage,
'staleness_distribution' => $stalenessDistribution
]);
}
@ -2207,4 +2341,30 @@ EOT;
{
return $this->extractJsonArrayByKey($filePath, 'specific_pages', $maxPages);
}
/**
* Calculate the median value of an array of numbers
*
* @param array $array Array of numbers
* @return float The median value
*/
private function calculateMedian(array $array): float
{
sort($array);
$count = count($array);
if ($count === 0) {
return 0;
}
$middle = floor($count / 2);
if ($count % 2 === 0) {
// Even number of elements, average the two middle values
return ($array[$middle - 1] + $array[$middle]) / 2;
} else {
// Odd number of elements, return the middle value
return $array[$middle];
}
}
}