diff --git a/src/Controller/WikiController.php b/src/Controller/WikiController.php index c43a7fa..26b1fda 100644 --- a/src/Controller/WikiController.php +++ b/src/Controller/WikiController.php @@ -23,19 +23,17 @@ class WikiController extends AbstractController $histogramExists = file_exists($histogramFile); if (file_exists($outdatedPagesFile)) { - $outdatedPagesData = json_decode(file_get_contents($outdatedPagesFile), true); + // Use memory-efficient approach to extract data from the large JSON file + $maxPages = 100; // Limit the number of pages to prevent memory exhaustion - if (isset($outdatedPagesData['regular_pages']) && is_array($outdatedPagesData['regular_pages'])) { - $regularPages = $outdatedPagesData['regular_pages']; - } + // Extract regular_pages array + $regularPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'regular_pages', $maxPages); - if (isset($outdatedPagesData['specific_pages']) && is_array($outdatedPagesData['specific_pages'])) { - $specificPages = $outdatedPagesData['specific_pages']; - } + // Extract specific_pages array + $specificPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'specific_pages', $maxPages); - if (isset($outdatedPagesData['last_updated'])) { - $lastUpdated = $outdatedPagesData['last_updated']; - } + // Extract last_updated value + $lastUpdated = $this->extractJsonScalarByKey($outdatedPagesFile, 'last_updated'); } return $this->render('admin/wiki_decrepitude.html.twig', [ @@ -738,21 +736,15 @@ class WikiController extends AbstractController return $this->redirectToRoute('app_admin_wiki'); } - $jsonData = json_decode(file_get_contents($jsonFile), true); - - if (empty($jsonData)) { - $this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.'); - return $this->redirectToRoute('app_admin_wiki'); - } - - // Combine regular_pages and specific_pages into a single array - $allPages = []; - if (isset($jsonData['regular_pages']) && is_array($jsonData['regular_pages'])) { - $allPages = array_merge($allPages, $jsonData['regular_pages']); - } - if (isset($jsonData['specific_pages']) && is_array($jsonData['specific_pages'])) { - $allPages = array_merge($allPages, $jsonData['specific_pages']); - } + // Use memory-efficient approach to extract only the necessary data + $maxItems = 100; // Limit the number of items to prevent memory exhaustion + + // Extract regular_pages and specific_pages arrays + $regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems); + $specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems); + + // Combine them into a single array + $allPages = array_merge($regularPages, $specificPages); if (empty($allPages)) { $this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.'); @@ -893,13 +885,17 @@ EOT; // Check if the archived proposals file exists and load it if (file_exists($jsonFile)) { - $proposalsData = json_decode(file_get_contents($jsonFile), true); - - if (isset($proposalsData['proposals']) && is_array($proposalsData['proposals'])) { - $proposals = $proposalsData['proposals']; - $statistics = $proposalsData['statistics'] ?? []; - $lastUpdated = isset($proposalsData['last_updated']) ? $proposalsData['last_updated'] : null; - } + // Use memory-efficient approach to extract only the necessary data + $maxItems = 100; // Limit the number of items to prevent memory exhaustion + + // Extract proposals array + $proposals = $this->extractJsonArrayByKey($jsonFile, 'proposals', $maxItems); + + // Extract statistics object + $statistics = $this->extractJsonArrayByKey($jsonFile, 'statistics', $maxItems); + + // Extract last_updated value + $lastUpdated = $this->extractJsonScalarByKey($jsonFile, 'last_updated'); // Check if the data is older than 1 day if ($lastUpdated) { @@ -1312,73 +1308,76 @@ EOT; $historyData = null; if (file_exists($jsonFile)) { - $jsonData = json_decode(file_get_contents($jsonFile), true); - + // Use memory-efficient approach to extract only the necessary data + $maxItems = 100; // Limit the number of items to prevent memory exhaustion + // Extract history data if available $historyData = []; - if (isset($jsonData['history']) && is_array($jsonData['history'])) { - // Process history data for the current key - foreach ($jsonData['history'] as $timestamp => $entry) { - $historyEntry = [ - 'timestamp' => $timestamp, - 'date' => (new \DateTime($timestamp))->format('Y-m-d'), - 'metrics' => [] - ]; - - // Check regular_pages - if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) { - foreach ($entry['regular_pages'] as $page) { - if (isset($page['key']) && $page['key'] === $key) { - // Extract metrics - $historyEntry['metrics'] = [ - 'staleness_score' => $page['staleness_score'] ?? 0, - 'date_diff' => $page['date_diff'] ?? 0, - 'word_diff' => $page['word_diff'] ?? 0, - 'section_diff' => $page['section_diff'] ?? 0, - 'link_diff' => $page['link_diff'] ?? 0, - 'media_diff' => $page['media_diff'] ?? 0 - ]; - $historyData[] = $historyEntry; - break; - } - } - } - - // If not found in regular_pages, check specific_pages - if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) { - foreach ($entry['specific_pages'] as $page) { - if (isset($page['key']) && $page['key'] === $key) { - // Extract metrics - $historyEntry['metrics'] = [ - 'staleness_score' => $page['staleness_score'] ?? 0, - 'date_diff' => $page['date_diff'] ?? 0, - 'word_diff' => $page['word_diff'] ?? 0, - 'section_diff' => $page['section_diff'] ?? 0, - 'link_diff' => $page['link_diff'] ?? 0, - 'media_diff' => $page['media_diff'] ?? 0 - ]; - $historyData[] = $historyEntry; - break; - } + + // Get history data from the JSON file + $historyEntries = $this->extractJsonArrayByKey($jsonFile, 'history', $maxItems); + + // Process history data for the current key + foreach ($historyEntries as $timestamp => $entry) { + $historyEntry = [ + 'timestamp' => $timestamp, + 'date' => is_string($timestamp) && !empty($timestamp) && $timestamp !== '0' ? + (new \DateTime($timestamp))->format('Y-m-d') : 'N/A', + 'metrics' => [] + ]; + + // Check regular_pages + if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) { + foreach ($entry['regular_pages'] as $page) { + if (isset($page['key']) && $page['key'] === $key) { + // Extract metrics + $historyEntry['metrics'] = [ + 'staleness_score' => $page['staleness_score'] ?? 0, + 'date_diff' => $page['date_diff'] ?? 0, + 'word_diff' => $page['word_diff'] ?? 0, + 'section_diff' => $page['section_diff'] ?? 0, + 'link_diff' => $page['link_diff'] ?? 0, + 'media_diff' => $page['media_diff'] ?? 0 + ]; + $historyData[] = $historyEntry; + break; } } } - // Sort history data by timestamp - usort($historyData, function($a, $b) { - return strtotime($a['timestamp']) - strtotime($b['timestamp']); - }); + // If not found in regular_pages, check specific_pages + if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) { + foreach ($entry['specific_pages'] as $page) { + if (isset($page['key']) && $page['key'] === $key) { + // Extract metrics + $historyEntry['metrics'] = [ + 'staleness_score' => $page['staleness_score'] ?? 0, + 'date_diff' => $page['date_diff'] ?? 0, + 'word_diff' => $page['word_diff'] ?? 0, + 'section_diff' => $page['section_diff'] ?? 0, + 'link_diff' => $page['link_diff'] ?? 0, + 'media_diff' => $page['media_diff'] ?? 0 + ]; + $historyData[] = $historyEntry; + break; + } + } + } } + + // Sort history data by timestamp + usort($historyData, function($a, $b) { + return strtotime($a['timestamp']) - strtotime($b['timestamp']); + }); - // Check both regular_pages and specific_pages sections - $allPages = []; - if (isset($jsonData['regular_pages']) && is_array($jsonData['regular_pages'])) { - $allPages = array_merge($allPages, $jsonData['regular_pages']); - } - if (isset($jsonData['specific_pages']) && is_array($jsonData['specific_pages'])) { - $allPages = array_merge($allPages, $jsonData['specific_pages']); - } + // Get regular_pages and specific_pages arrays + $regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems); + $specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems); + + // Combine them into a single array + $allPages = array_merge($regularPages, $specificPages); + // Find the page with the matching key foreach ($allPages as $page) { if (isset($page['key']) && $page['key'] === $key) { $mediaComparison = $page['media_comparison'] ?? null; @@ -1850,8 +1849,355 @@ EOT; return $contentHtml; } + /** + * Extracts an array from a large JSON file by key without loading the entire file into memory + * + * @param string $filePath Path to the JSON file + * @param string $key The key of the array to extract + * @param int $maxItems Maximum number of items to extract (to prevent memory exhaustion) + * @return array The extracted array + */ + private function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array + { + $result = []; + + // First, check if the file exists and is readable + if (!is_readable($filePath)) { + error_log("File is not readable: $filePath"); + return $result; + } + + // Get the file size + $fileSize = filesize($filePath); + if ($fileSize === false || $fileSize === 0) { + error_log("File is empty or size could not be determined: $filePath"); + return $result; + } + + try { + // For very large files, we'll use a more efficient approach + // We'll search for the specified key directly + $handle = fopen($filePath, 'r'); + if (!$handle) { + error_log("Could not open file: $filePath"); + return $result; + } + + // Variables to track parsing state + $bracketCount = 0; + $buffer = ''; + $itemCount = 0; + $inArray = false; + $arrayStarted = false; + + // Skip ahead to find the specified key more quickly + $found = false; + $searchKey = '"' . $key . '"'; + + while (!$found && ($line = fgets($handle)) !== false) { + if (strpos($line, $searchKey) !== false) { + $found = true; + + // Extract everything after the key + $keyPos = strpos($line, $searchKey); + $afterKey = substr($line, $keyPos + strlen($searchKey)); + + // Find the colon and then the opening bracket + if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) { + $inArray = true; + $arrayStarted = true; + $bracketPos = strpos($afterKey, '['); + $buffer = '['; // Start the buffer with an opening bracket + $bracketCount = 1; + + // Add everything after the opening bracket to the buffer + $buffer .= substr($afterKey, $bracketPos + 1); + } else if (strpos($afterKey, ':') !== false) { + // The opening bracket might be on the next line + $inArray = true; + } + + break; + } + } + + // If we didn't find the key, return empty array + if (!$found) { + fclose($handle); + error_log("Key '$key' not found in file: $filePath"); + return $result; + } + + // If we found the key but not the opening bracket yet, look for it + if ($inArray && !$arrayStarted) { + while (($line = fgets($handle)) !== false) { + if (strpos($line, '[') !== false) { + $bracketPos = strpos($line, '['); + $buffer = '['; // Start the buffer with an opening bracket + $bracketCount = 1; + $arrayStarted = true; + + // Add everything after the opening bracket to the buffer + $buffer .= substr($line, $bracketPos + 1); + break; + } + } + } + + // If we still haven't found the opening bracket, something is wrong + if (!$arrayStarted) { + fclose($handle); + error_log("Could not find opening bracket for array '$key' in file: $filePath"); + return $result; + } + + // Now process the array + $collectingItems = true; + while ($collectingItems && ($line = fgets($handle)) !== false) { + // Count opening and closing brackets to track array nesting + $openBrackets = substr_count($line, '[') + substr_count($line, '{'); + $closeBrackets = substr_count($line, ']') + substr_count($line, '}'); + $bracketCount += $openBrackets - $closeBrackets; + + // Add the line to our buffer + $buffer .= $line; + + // If we've reached the end of the array (bracketCount = 0) + if ($bracketCount === 0) { + $collectingItems = false; + + // Try to parse the buffer as JSON + try { + $parsedData = json_decode($buffer, true); + if (json_last_error() !== JSON_ERROR_NONE) { + error_log("JSON parse error: " . json_last_error_msg() . " for key '$key'"); + + // Try a different approach - manually construct a valid JSON array + // Split the buffer by objects (each starting with { and ending with }) + preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); + + if (!empty($matches[0])) { + // Take the first $maxItems objects + $objects = array_slice($matches[0], 0, $maxItems); + + // Construct a valid JSON array + $validJson = '[' . implode(',', $objects) . ']'; + + // Try to parse the valid JSON + $parsedData = json_decode($validJson, true); + if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { + $result = $parsedData; + } else { + error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'"); + } + } + } else if (is_array($parsedData)) { + // Limit the number of items to prevent memory exhaustion + $result = array_slice($parsedData, 0, $maxItems); + } + } catch (\Exception $e) { + error_log("Exception parsing JSON for key '$key': " . $e->getMessage()); + } + + break; + } + + // Check if we've found a complete item (when we see a closing brace followed by a comma) + // This is used to count items and limit the number of items processed + if (preg_match('/\}\s*,\s*$/m', $line)) { + $itemCount++; + + // If we've reached the maximum number of items, stop processing + if ($itemCount >= $maxItems) { + $collectingItems = false; + + // Create a valid JSON array with the items we've collected so far + // We need to ensure the buffer ends with a complete JSON object and a closing bracket + + // First, find the last complete object (ending with }) + $lastObjectEnd = strrpos($buffer, '}'); + if ($lastObjectEnd !== false) { + // Truncate the buffer at the end of the last complete object + $buffer = substr($buffer, 0, $lastObjectEnd + 1); + // Add the closing bracket for the array + $buffer .= ']'; + + // Try to parse the buffer as JSON + try { + $parsedData = json_decode($buffer, true); + if (json_last_error() !== JSON_ERROR_NONE) { + error_log("JSON parse error after max items: " . json_last_error_msg() . " for key '$key'"); + + // Try a different approach - manually construct a valid JSON array + // Split the buffer by objects (each starting with { and ending with }) + preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); + + if (!empty($matches[0])) { + // Take the first $maxItems objects + $objects = array_slice($matches[0], 0, $maxItems); + + // Construct a valid JSON array + $validJson = '[' . implode(',', $objects) . ']'; + + // Try to parse the valid JSON + $parsedData = json_decode($validJson, true); + if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { + $result = $parsedData; + } else { + error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'"); + } + } + } else if (is_array($parsedData)) { + $result = $parsedData; + } + } catch (\Exception $e) { + error_log("Exception parsing JSON after max items for key '$key': " . $e->getMessage()); + } + } else { + error_log("Could not find the end of the last complete object for key '$key'"); + } + + break; + } + } + } + + // Close the file + fclose($handle); + + } catch (\Exception $e) { + error_log("Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage()); + } + + return $result; + } + + /** + * Extracts a scalar value from a large JSON file by key without loading the entire file into memory + * + * @param string $filePath Path to the JSON file + * @param string $key The key of the scalar value to extract + * @return mixed The extracted scalar value or null if not found + */ + private function extractJsonScalarByKey(string $filePath, string $key): mixed + { + // First, check if the file exists and is readable + if (!is_readable($filePath)) { + error_log("File is not readable: $filePath"); + return null; + } + + try { + // For very large files, we'll use a more efficient approach + // We'll search for the specified key directly + $handle = fopen($filePath, 'r'); + if (!$handle) { + error_log("Could not open file: $filePath"); + return null; + } + + // Skip ahead to find the specified key more quickly + $found = false; + $searchKey = '"' . $key . '"'; + $value = null; + + while (!$found && ($line = fgets($handle)) !== false) { + if (strpos($line, $searchKey) !== false) { + $found = true; + + // Extract everything after the key + $keyPos = strpos($line, $searchKey); + $afterKey = substr($line, $keyPos + strlen($searchKey)); + + // Check if the value is on this line + if (strpos($afterKey, ':') !== false) { + $colonPos = strpos($afterKey, ':'); + $afterColon = trim(substr($afterKey, $colonPos + 1)); + + // Extract the value based on its type + if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) { + // String value + $value = $matches[1]; + } elseif (preg_match('/^(\d+)/', $afterColon, $matches)) { + // Numeric value + $value = intval($matches[1]); + } elseif (preg_match('/^(true|false)/', $afterColon, $matches)) { + // Boolean value + $value = ($matches[1] === 'true'); + } elseif (strpos($afterColon, 'null') === 0) { + // Null value + $value = null; + } else { + // The value might be on the next line or more complex + // For simplicity, we'll just use the regex approach as a fallback + if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) { + // String value + $value = $matches[1]; + } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) { + // Numeric value + $value = intval($matches[1]); + } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) { + // Boolean value + $value = ($matches[1] === 'true'); + } elseif (strpos($line, 'null') !== false) { + // Null value + $value = null; + } else { + error_log("Could not extract value for key '$key' from line: " . trim($line)); + } + } + } else { + // The value might be on the next line + error_log("Value for key '$key' might be on the next line, using fallback method"); + + // Read the next line + $nextLine = fgets($handle); + if ($nextLine !== false) { + $combinedLine = $line . $nextLine; + + // Try to extract the value using regex + if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) { + // String value + $value = $matches[1]; + } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) { + // Numeric value + $value = intval($matches[1]); + } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) { + // Boolean value + $value = ($matches[1] === 'true'); + } elseif (strpos($combinedLine, 'null') !== false) { + // Null value + $value = null; + } else { + error_log("Could not extract value for key '$key' from combined lines"); + } + } + } + + break; + } + } + + // Close the file + fclose($handle); + + if (!$found) { + error_log("Key '$key' not found in file: $filePath"); + } else if ($value === null) { + error_log("Value for key '$key' is null or could not be extracted"); + } + + return $value; + + } catch (\Exception $e) { + error_log("Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage()); + return null; + } + } + /** * Extracts the specific_pages array from a large JSON file without loading the entire file into memory + * This is a legacy method kept for backward compatibility * * @param string $filePath Path to the JSON file * @param int $maxPages Maximum number of pages to extract (to prevent memory exhaustion) @@ -1859,114 +2205,6 @@ EOT; */ private function extractSpecificPagesFromJson(string $filePath, int $maxPages = 100): array { - $specificPages = []; - - // For very large files, we'll use a more direct approach - // Instead of parsing the entire JSON structure, we'll extract just what we need - - // First, check if the file exists and is readable - if (!is_readable($filePath)) { - return $specificPages; - } - - // Get the file size - $fileSize = filesize($filePath); - if ($fileSize === false || $fileSize === 0) { - return $specificPages; - } - - // For very large files, we'll use a more efficient approach - // We'll search for the "specific_pages" key directly - $handle = fopen($filePath, 'r'); - if (!$handle) { - return $specificPages; - } - - // Variables to track parsing state - $inSpecificPages = false; - $bracketCount = 0; - $buffer = ''; - $pageCount = 0; - $lineCount = 0; - - // Skip ahead to find the specific_pages key more quickly - // This is a simple optimization for this specific file structure - $found = false; - while (!$found && ($line = fgets($handle)) !== false) { - $lineCount++; - if (strpos($line, '"specific_pages"') !== false) { - $found = true; - $inSpecificPages = true; - - // Find the opening bracket of the array - if (strpos($line, '[') !== false) { - $bracketCount = 1; - $buffer = '['; // Start the buffer with an opening bracket - } else { - // If the opening bracket is on the next line - $nextLine = fgets($handle); - if ($nextLine !== false && strpos($nextLine, '[') !== false) { - $bracketCount = 1; - $buffer = '['; // Start the buffer with an opening bracket - } - } - break; - } - } - - // If we didn't find the specific_pages key, return empty array - if (!$found) { - fclose($handle); - return $specificPages; - } - - // Now process the specific_pages array - while (($line = fgets($handle)) !== false) { - // Count opening and closing brackets to track array nesting - $openBrackets = substr_count($line, '[') + substr_count($line, '{'); - $closeBrackets = substr_count($line, ']') + substr_count($line, '}'); - $bracketCount += $openBrackets - $closeBrackets; - - // Add the line to our buffer - $buffer .= $line; - - // If we've reached the end of the array (bracketCount = 0) - if ($bracketCount === 0) { - // Parse the buffer as JSON - $parsedData = json_decode($buffer, true); - if (is_array($parsedData)) { - // Limit the number of pages to prevent memory exhaustion - $specificPages = array_slice($parsedData, 0, $maxPages); - } else { - // If parsing fails, log the error but don't crash - error_log('Failed to parse specific_pages JSON data in ' . $filePath); - } - break; - } - - // Check if we've found a complete page object (when we see a closing brace followed by a comma) - if (preg_match('/\}\s*,\s*$/m', $line)) { - $pageCount++; - // If we've reached the maximum number of pages, stop processing - if ($pageCount >= $maxPages) { - // Close the array properly - $buffer = rtrim($buffer, ",\r\n") . ']'; - // Parse the buffer as JSON - $parsedData = json_decode($buffer, true); - if (is_array($parsedData)) { - $specificPages = $parsedData; - } else { - // If parsing fails, log the error but don't crash - error_log('Failed to parse specific_pages JSON data in ' . $filePath . ' after reaching max pages'); - } - break; - } - } - } - - // Close the file - fclose($handle); - - return $specificPages; + return $this->extractJsonArrayByKey($filePath, 'specific_pages', $maxPages); } } \ No newline at end of file diff --git a/templates/admin/wiki.html.twig b/templates/admin/wiki.html.twig index 1cf652d..c8fef3a 100644 --- a/templates/admin/wiki.html.twig +++ b/templates/admin/wiki.html.twig @@ -293,20 +293,20 @@
{{ page.reason }}
+{#{{ page.reason }}
#}- Dernière modification: {{ page.fr_page.last_modified }} + Dernière modification: {{ page.fr_page.last_modified is defined ? page.fr_page.last_modified : 'Non disponible' }}
{% else %}@@ -66,11 +66,11 @@ {% endif %}
La page wiki pour la clé - "{{ page.key }}" n'existe pas en français.
+ "{% if page.key is defined %}{{ page.key }}{% elseif page.title is defined %}{{ page.title }}{% else %}Page sans clé{% endif %}" n'existe pas en français.Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.
{{ page.reason }}
+{#{{ page.reason }}
#}- Dernière modification: {{ page.fr_page.last_modified }} + Dernière modification: {{ page.fr_page.last_modified is defined ? page.fr_page.last_modified : 'Non disponible' }}
{% else %}@@ -66,11 +66,11 @@ {% endif %}