This commit is contained in:
Tykayn 2025-09-03 17:18:21 +02:00 committed by tykayn
parent 0df830f93d
commit 2ad98b5864
11 changed files with 808 additions and 224 deletions

View file

@ -8,6 +8,44 @@ use Symfony\Component\Routing\Annotation\Route;
class WikiController extends AbstractController
{
/**
* Displays the evolution of decrepitude scores from JSON history data
*/
#[Route('/wiki/decrepitude', name: 'app_admin_wiki_decrepitude')]
public function decrepitudeScores(): Response
{
$outdatedPagesFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/outdated_pages.json';
$histogramFile = $this->getParameter('kernel.project_dir') . '/wiki_compare/staleness_histogram.png';
$regularPages = [];
$specificPages = [];
$lastUpdated = null;
$histogramExists = file_exists($histogramFile);
if (file_exists($outdatedPagesFile)) {
$outdatedPagesData = json_decode(file_get_contents($outdatedPagesFile), true);
if (isset($outdatedPagesData['regular_pages']) && is_array($outdatedPagesData['regular_pages'])) {
$regularPages = $outdatedPagesData['regular_pages'];
}
if (isset($outdatedPagesData['specific_pages']) && is_array($outdatedPagesData['specific_pages'])) {
$specificPages = $outdatedPagesData['specific_pages'];
}
if (isset($outdatedPagesData['last_updated'])) {
$lastUpdated = $outdatedPagesData['last_updated'];
}
}
return $this->render('admin/wiki_decrepitude.html.twig', [
'regular_pages' => $regularPages,
'specific_pages' => $specificPages,
'last_updated' => $lastUpdated,
'histogram_exists' => $histogramExists,
'json_exists' => file_exists($outdatedPagesFile)
]);
}
/**
* Detects incorrect heading hierarchies in a list of sections
* For example, h4 directly under h2 without h3 in between
@ -283,9 +321,25 @@ class WikiController extends AbstractController
$this->addFlash('error', 'Impossible de générer le fichier des pages sans traduction.');
}
}
// Remove duplicates based on page title
$uniquePages = [];
$seenTitles = [];
foreach ($untranslatedPages as $page) {
if (!isset($seenTitles[$page['title']])) {
$seenTitles[$page['title']] = true;
$uniquePages[] = $page;
}
}
// Sort pages by title
usort($uniquePages, function($a, $b) {
return strcasecmp($a['title'], $b['title']);
});
return $this->render('admin/wiki_missing_translations.html.twig', [
'untranslated_pages' => $untranslatedPages,
'untranslated_pages' => $uniquePages,
'last_updated' => $lastUpdated
]);
}
@ -717,7 +771,7 @@ class WikiController extends AbstractController
public function createFrench(string $key): Response
{
// Construct the URLs for the English page and the French page creation form
$englishUrl = "https://wiki.openstreetmap.org/wiki/Key:{$key}";
$englishUrl = "https://wiki.openstreetmap.org/wiki/{$key}";
$frenchEditUrl = "https://wiki.openstreetmap.org/w/index.php?title=FR:{$key}&action=edit";
// Fetch the HTML content of the English page using wiki_compare.py
@ -910,7 +964,7 @@ EOT;
$pageDifferences = [];
$pagesUnavailableInEnglish = [];
// First pass: collect all staleness scores to find min and max
// Collect all staleness scores for statistics
$stalenessScores = [];
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
@ -919,27 +973,40 @@ EOT;
}
}
// Find min and max scores for normalization
$minScore = !empty($stalenessScores) ? min($stalenessScores) : 0;
$maxScore = !empty($stalenessScores) ? max($stalenessScores) : 100;
// Calculate statistics
$stalenessStats = [
'count' => count($stalenessScores),
'min' => !empty($stalenessScores) ? min($stalenessScores) : 0,
'max' => !empty($stalenessScores) ? max($stalenessScores) : 0,
'mean' => 0,
'std_dev' => 0
];
// Calculate mean
if (!empty($stalenessScores)) {
$stalenessStats['mean'] = array_sum($stalenessScores) / count($stalenessScores);
// Calculate standard deviation
$variance = 0;
foreach ($stalenessScores as $score) {
$variance += pow($score - $stalenessStats['mean'], 2);
}
$stalenessStats['std_dev'] = sqrt($variance / count($stalenessScores));
}
// Round statistics to 2 decimal places
$stalenessStats['mean'] = round($stalenessStats['mean'], 2);
$stalenessStats['std_dev'] = round($stalenessStats['std_dev'], 2);
// Second pass: process pages and normalize scores
// Process pages - use absolute values without normalization
foreach ($csvData as $row) {
$page = array_combine($headers, $row);
// Normalize staleness score to 0-100 range (0 = best, 100 = worst)
// Use absolute values of staleness score without normalization
if (isset($page['staleness_score']) && is_numeric($page['staleness_score'])) {
$originalScore = (float)$page['staleness_score'];
// Avoid division by zero
if ($maxScore > $minScore) {
$normalizedScore = ($originalScore - $minScore) / ($maxScore - $minScore) * 100;
} else {
$normalizedScore = 50; // Default to middle value if all scores are the same
}
$page['staleness_score'] = abs((float)$page['staleness_score']);
// Round to 2 decimal places
$page['staleness_score'] = round($normalizedScore, 2);
$page['staleness_score'] = round($page['staleness_score'], 2);
}
$wikiPages[$page['key']][$page['language']] = $page;
@ -953,6 +1020,18 @@ EOT;
}
// Prepare arrays for statistics
$stats = [
'en_sections' => [],
'fr_sections' => [],
'en_words' => [],
'fr_words' => [],
'en_links' => [],
'fr_links' => [],
'en_media' => [],
'fr_media' => []
];
// Calculate differences between English and French versions
foreach ($wikiPages as $key => $languages) {
if (isset($languages['en']) && isset($languages['fr'])) {
@ -977,6 +1056,39 @@ EOT;
'media_diff' => $mediaDiff,
'media_diff_formatted' => ($mediaDiff >= 0 ? '+' : '') . $mediaDiff,
];
// Collect data for statistics
$stats['en_sections'][] = (int)$en['sections'];
$stats['fr_sections'][] = (int)$fr['sections'];
$stats['en_words'][] = (int)$en['word_count'];
$stats['fr_words'][] = (int)$fr['word_count'];
$stats['en_links'][] = (int)$en['link_count'];
$stats['fr_links'][] = (int)$fr['link_count'];
$stats['en_media'][] = isset($en['media_count']) ? (int)$en['media_count'] : 0;
$stats['fr_media'][] = isset($fr['media_count']) ? (int)$fr['media_count'] : 0;
}
}
// Calculate statistics
$wikiPagesStats = [];
foreach ($stats as $key => $values) {
if (!empty($values)) {
$mean = array_sum($values) / count($values);
// Calculate standard deviation
$variance = 0;
foreach ($values as $value) {
$variance += pow($value - $mean, 2);
}
$stdDev = sqrt($variance / count($values));
$wikiPagesStats[$key] = [
'count' => count($values),
'min' => min($values),
'max' => max($values),
'mean' => round($mean, 2),
'std_dev' => round($stdDev, 2)
];
}
}
@ -1022,7 +1134,9 @@ EOT;
'page_differences' => $pageDifferences,
'pages_unavailable_in_english' => $pagesUnavailableInEnglish,
'specific_pages' => $specificPages,
'newly_created_pages' => $newlyCreatedPages
'newly_created_pages' => $newlyCreatedPages,
'staleness_stats' => $stalenessStats,
'wiki_pages_stats' => $wikiPagesStats
]);
}
@ -1380,7 +1494,7 @@ EOT;
// Create URL for new French page if it doesn't exist
$createFrUrl = null;
if (!$frPage) {
$createFrUrl = 'https://wiki.openstreetmap.org/wiki/FR:Key:' . $key;
$createFrUrl = 'https://wiki.openstreetmap.org/wiki/FR:' . $key;
}
// Format section titles for copy functionality