up pages
This commit is contained in:
parent
dffb21b56e
commit
1ed74c2e2f
9 changed files with 1610 additions and 305 deletions
|
@ -23,19 +23,17 @@ class WikiController extends AbstractController
|
||||||
$histogramExists = file_exists($histogramFile);
|
$histogramExists = file_exists($histogramFile);
|
||||||
|
|
||||||
if (file_exists($outdatedPagesFile)) {
|
if (file_exists($outdatedPagesFile)) {
|
||||||
$outdatedPagesData = json_decode(file_get_contents($outdatedPagesFile), true);
|
// Use memory-efficient approach to extract data from the large JSON file
|
||||||
|
$maxPages = 100; // Limit the number of pages to prevent memory exhaustion
|
||||||
|
|
||||||
if (isset($outdatedPagesData['regular_pages']) && is_array($outdatedPagesData['regular_pages'])) {
|
// Extract regular_pages array
|
||||||
$regularPages = $outdatedPagesData['regular_pages'];
|
$regularPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'regular_pages', $maxPages);
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($outdatedPagesData['specific_pages']) && is_array($outdatedPagesData['specific_pages'])) {
|
// Extract specific_pages array
|
||||||
$specificPages = $outdatedPagesData['specific_pages'];
|
$specificPages = $this->extractJsonArrayByKey($outdatedPagesFile, 'specific_pages', $maxPages);
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($outdatedPagesData['last_updated'])) {
|
// Extract last_updated value
|
||||||
$lastUpdated = $outdatedPagesData['last_updated'];
|
$lastUpdated = $this->extractJsonScalarByKey($outdatedPagesFile, 'last_updated');
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->render('admin/wiki_decrepitude.html.twig', [
|
return $this->render('admin/wiki_decrepitude.html.twig', [
|
||||||
|
@ -738,21 +736,15 @@ class WikiController extends AbstractController
|
||||||
return $this->redirectToRoute('app_admin_wiki');
|
return $this->redirectToRoute('app_admin_wiki');
|
||||||
}
|
}
|
||||||
|
|
||||||
$jsonData = json_decode(file_get_contents($jsonFile), true);
|
// Use memory-efficient approach to extract only the necessary data
|
||||||
|
$maxItems = 100; // Limit the number of items to prevent memory exhaustion
|
||||||
|
|
||||||
if (empty($jsonData)) {
|
// Extract regular_pages and specific_pages arrays
|
||||||
$this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.');
|
$regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems);
|
||||||
return $this->redirectToRoute('app_admin_wiki');
|
$specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems);
|
||||||
}
|
|
||||||
|
|
||||||
// Combine regular_pages and specific_pages into a single array
|
// Combine them into a single array
|
||||||
$allPages = [];
|
$allPages = array_merge($regularPages, $specificPages);
|
||||||
if (isset($jsonData['regular_pages']) && is_array($jsonData['regular_pages'])) {
|
|
||||||
$allPages = array_merge($allPages, $jsonData['regular_pages']);
|
|
||||||
}
|
|
||||||
if (isset($jsonData['specific_pages']) && is_array($jsonData['specific_pages'])) {
|
|
||||||
$allPages = array_merge($allPages, $jsonData['specific_pages']);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (empty($allPages)) {
|
if (empty($allPages)) {
|
||||||
$this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.');
|
$this->addFlash('error', 'Aucune page à améliorer n\'a été trouvée.');
|
||||||
|
@ -893,13 +885,17 @@ EOT;
|
||||||
|
|
||||||
// Check if the archived proposals file exists and load it
|
// Check if the archived proposals file exists and load it
|
||||||
if (file_exists($jsonFile)) {
|
if (file_exists($jsonFile)) {
|
||||||
$proposalsData = json_decode(file_get_contents($jsonFile), true);
|
// Use memory-efficient approach to extract only the necessary data
|
||||||
|
$maxItems = 100; // Limit the number of items to prevent memory exhaustion
|
||||||
|
|
||||||
if (isset($proposalsData['proposals']) && is_array($proposalsData['proposals'])) {
|
// Extract proposals array
|
||||||
$proposals = $proposalsData['proposals'];
|
$proposals = $this->extractJsonArrayByKey($jsonFile, 'proposals', $maxItems);
|
||||||
$statistics = $proposalsData['statistics'] ?? [];
|
|
||||||
$lastUpdated = isset($proposalsData['last_updated']) ? $proposalsData['last_updated'] : null;
|
// Extract statistics object
|
||||||
}
|
$statistics = $this->extractJsonArrayByKey($jsonFile, 'statistics', $maxItems);
|
||||||
|
|
||||||
|
// Extract last_updated value
|
||||||
|
$lastUpdated = $this->extractJsonScalarByKey($jsonFile, 'last_updated');
|
||||||
|
|
||||||
// Check if the data is older than 1 day
|
// Check if the data is older than 1 day
|
||||||
if ($lastUpdated) {
|
if ($lastUpdated) {
|
||||||
|
@ -1312,73 +1308,76 @@ EOT;
|
||||||
$historyData = null;
|
$historyData = null;
|
||||||
|
|
||||||
if (file_exists($jsonFile)) {
|
if (file_exists($jsonFile)) {
|
||||||
$jsonData = json_decode(file_get_contents($jsonFile), true);
|
// Use memory-efficient approach to extract only the necessary data
|
||||||
|
$maxItems = 100; // Limit the number of items to prevent memory exhaustion
|
||||||
|
|
||||||
// Extract history data if available
|
// Extract history data if available
|
||||||
$historyData = [];
|
$historyData = [];
|
||||||
if (isset($jsonData['history']) && is_array($jsonData['history'])) {
|
|
||||||
// Process history data for the current key
|
|
||||||
foreach ($jsonData['history'] as $timestamp => $entry) {
|
|
||||||
$historyEntry = [
|
|
||||||
'timestamp' => $timestamp,
|
|
||||||
'date' => (new \DateTime($timestamp))->format('Y-m-d'),
|
|
||||||
'metrics' => []
|
|
||||||
];
|
|
||||||
|
|
||||||
// Check regular_pages
|
// Get history data from the JSON file
|
||||||
if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) {
|
$historyEntries = $this->extractJsonArrayByKey($jsonFile, 'history', $maxItems);
|
||||||
foreach ($entry['regular_pages'] as $page) {
|
|
||||||
if (isset($page['key']) && $page['key'] === $key) {
|
|
||||||
// Extract metrics
|
|
||||||
$historyEntry['metrics'] = [
|
|
||||||
'staleness_score' => $page['staleness_score'] ?? 0,
|
|
||||||
'date_diff' => $page['date_diff'] ?? 0,
|
|
||||||
'word_diff' => $page['word_diff'] ?? 0,
|
|
||||||
'section_diff' => $page['section_diff'] ?? 0,
|
|
||||||
'link_diff' => $page['link_diff'] ?? 0,
|
|
||||||
'media_diff' => $page['media_diff'] ?? 0
|
|
||||||
];
|
|
||||||
$historyData[] = $historyEntry;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If not found in regular_pages, check specific_pages
|
// Process history data for the current key
|
||||||
if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) {
|
foreach ($historyEntries as $timestamp => $entry) {
|
||||||
foreach ($entry['specific_pages'] as $page) {
|
$historyEntry = [
|
||||||
if (isset($page['key']) && $page['key'] === $key) {
|
'timestamp' => $timestamp,
|
||||||
// Extract metrics
|
'date' => is_string($timestamp) && !empty($timestamp) && $timestamp !== '0' ?
|
||||||
$historyEntry['metrics'] = [
|
(new \DateTime($timestamp))->format('Y-m-d') : 'N/A',
|
||||||
'staleness_score' => $page['staleness_score'] ?? 0,
|
'metrics' => []
|
||||||
'date_diff' => $page['date_diff'] ?? 0,
|
];
|
||||||
'word_diff' => $page['word_diff'] ?? 0,
|
|
||||||
'section_diff' => $page['section_diff'] ?? 0,
|
// Check regular_pages
|
||||||
'link_diff' => $page['link_diff'] ?? 0,
|
if (isset($entry['regular_pages']) && is_array($entry['regular_pages'])) {
|
||||||
'media_diff' => $page['media_diff'] ?? 0
|
foreach ($entry['regular_pages'] as $page) {
|
||||||
];
|
if (isset($page['key']) && $page['key'] === $key) {
|
||||||
$historyData[] = $historyEntry;
|
// Extract metrics
|
||||||
break;
|
$historyEntry['metrics'] = [
|
||||||
}
|
'staleness_score' => $page['staleness_score'] ?? 0,
|
||||||
|
'date_diff' => $page['date_diff'] ?? 0,
|
||||||
|
'word_diff' => $page['word_diff'] ?? 0,
|
||||||
|
'section_diff' => $page['section_diff'] ?? 0,
|
||||||
|
'link_diff' => $page['link_diff'] ?? 0,
|
||||||
|
'media_diff' => $page['media_diff'] ?? 0
|
||||||
|
];
|
||||||
|
$historyData[] = $historyEntry;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort history data by timestamp
|
// If not found in regular_pages, check specific_pages
|
||||||
usort($historyData, function($a, $b) {
|
if (empty($historyEntry['metrics']) && isset($entry['specific_pages']) && is_array($entry['specific_pages'])) {
|
||||||
return strtotime($a['timestamp']) - strtotime($b['timestamp']);
|
foreach ($entry['specific_pages'] as $page) {
|
||||||
});
|
if (isset($page['key']) && $page['key'] === $key) {
|
||||||
|
// Extract metrics
|
||||||
|
$historyEntry['metrics'] = [
|
||||||
|
'staleness_score' => $page['staleness_score'] ?? 0,
|
||||||
|
'date_diff' => $page['date_diff'] ?? 0,
|
||||||
|
'word_diff' => $page['word_diff'] ?? 0,
|
||||||
|
'section_diff' => $page['section_diff'] ?? 0,
|
||||||
|
'link_diff' => $page['link_diff'] ?? 0,
|
||||||
|
'media_diff' => $page['media_diff'] ?? 0
|
||||||
|
];
|
||||||
|
$historyData[] = $historyEntry;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check both regular_pages and specific_pages sections
|
// Sort history data by timestamp
|
||||||
$allPages = [];
|
usort($historyData, function($a, $b) {
|
||||||
if (isset($jsonData['regular_pages']) && is_array($jsonData['regular_pages'])) {
|
return strtotime($a['timestamp']) - strtotime($b['timestamp']);
|
||||||
$allPages = array_merge($allPages, $jsonData['regular_pages']);
|
});
|
||||||
}
|
|
||||||
if (isset($jsonData['specific_pages']) && is_array($jsonData['specific_pages'])) {
|
|
||||||
$allPages = array_merge($allPages, $jsonData['specific_pages']);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Get regular_pages and specific_pages arrays
|
||||||
|
$regularPages = $this->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems);
|
||||||
|
$specificPages = $this->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems);
|
||||||
|
|
||||||
|
// Combine them into a single array
|
||||||
|
$allPages = array_merge($regularPages, $specificPages);
|
||||||
|
|
||||||
|
// Find the page with the matching key
|
||||||
foreach ($allPages as $page) {
|
foreach ($allPages as $page) {
|
||||||
if (isset($page['key']) && $page['key'] === $key) {
|
if (isset($page['key']) && $page['key'] === $key) {
|
||||||
$mediaComparison = $page['media_comparison'] ?? null;
|
$mediaComparison = $page['media_comparison'] ?? null;
|
||||||
|
@ -1850,8 +1849,355 @@ EOT;
|
||||||
return $contentHtml;
|
return $contentHtml;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts an array from a large JSON file by key without loading the entire file into memory
|
||||||
|
*
|
||||||
|
* @param string $filePath Path to the JSON file
|
||||||
|
* @param string $key The key of the array to extract
|
||||||
|
* @param int $maxItems Maximum number of items to extract (to prevent memory exhaustion)
|
||||||
|
* @return array The extracted array
|
||||||
|
*/
|
||||||
|
private function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array
|
||||||
|
{
|
||||||
|
$result = [];
|
||||||
|
|
||||||
|
// First, check if the file exists and is readable
|
||||||
|
if (!is_readable($filePath)) {
|
||||||
|
error_log("File is not readable: $filePath");
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the file size
|
||||||
|
$fileSize = filesize($filePath);
|
||||||
|
if ($fileSize === false || $fileSize === 0) {
|
||||||
|
error_log("File is empty or size could not be determined: $filePath");
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// For very large files, we'll use a more efficient approach
|
||||||
|
// We'll search for the specified key directly
|
||||||
|
$handle = fopen($filePath, 'r');
|
||||||
|
if (!$handle) {
|
||||||
|
error_log("Could not open file: $filePath");
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variables to track parsing state
|
||||||
|
$bracketCount = 0;
|
||||||
|
$buffer = '';
|
||||||
|
$itemCount = 0;
|
||||||
|
$inArray = false;
|
||||||
|
$arrayStarted = false;
|
||||||
|
|
||||||
|
// Skip ahead to find the specified key more quickly
|
||||||
|
$found = false;
|
||||||
|
$searchKey = '"' . $key . '"';
|
||||||
|
|
||||||
|
while (!$found && ($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, $searchKey) !== false) {
|
||||||
|
$found = true;
|
||||||
|
|
||||||
|
// Extract everything after the key
|
||||||
|
$keyPos = strpos($line, $searchKey);
|
||||||
|
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||||||
|
|
||||||
|
// Find the colon and then the opening bracket
|
||||||
|
if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) {
|
||||||
|
$inArray = true;
|
||||||
|
$arrayStarted = true;
|
||||||
|
$bracketPos = strpos($afterKey, '[');
|
||||||
|
$buffer = '['; // Start the buffer with an opening bracket
|
||||||
|
$bracketCount = 1;
|
||||||
|
|
||||||
|
// Add everything after the opening bracket to the buffer
|
||||||
|
$buffer .= substr($afterKey, $bracketPos + 1);
|
||||||
|
} else if (strpos($afterKey, ':') !== false) {
|
||||||
|
// The opening bracket might be on the next line
|
||||||
|
$inArray = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we didn't find the key, return empty array
|
||||||
|
if (!$found) {
|
||||||
|
fclose($handle);
|
||||||
|
error_log("Key '$key' not found in file: $filePath");
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found the key but not the opening bracket yet, look for it
|
||||||
|
if ($inArray && !$arrayStarted) {
|
||||||
|
while (($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, '[') !== false) {
|
||||||
|
$bracketPos = strpos($line, '[');
|
||||||
|
$buffer = '['; // Start the buffer with an opening bracket
|
||||||
|
$bracketCount = 1;
|
||||||
|
$arrayStarted = true;
|
||||||
|
|
||||||
|
// Add everything after the opening bracket to the buffer
|
||||||
|
$buffer .= substr($line, $bracketPos + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we still haven't found the opening bracket, something is wrong
|
||||||
|
if (!$arrayStarted) {
|
||||||
|
fclose($handle);
|
||||||
|
error_log("Could not find opening bracket for array '$key' in file: $filePath");
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now process the array
|
||||||
|
$collectingItems = true;
|
||||||
|
while ($collectingItems && ($line = fgets($handle)) !== false) {
|
||||||
|
// Count opening and closing brackets to track array nesting
|
||||||
|
$openBrackets = substr_count($line, '[') + substr_count($line, '{');
|
||||||
|
$closeBrackets = substr_count($line, ']') + substr_count($line, '}');
|
||||||
|
$bracketCount += $openBrackets - $closeBrackets;
|
||||||
|
|
||||||
|
// Add the line to our buffer
|
||||||
|
$buffer .= $line;
|
||||||
|
|
||||||
|
// If we've reached the end of the array (bracketCount = 0)
|
||||||
|
if ($bracketCount === 0) {
|
||||||
|
$collectingItems = false;
|
||||||
|
|
||||||
|
// Try to parse the buffer as JSON
|
||||||
|
try {
|
||||||
|
$parsedData = json_decode($buffer, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
error_log("JSON parse error: " . json_last_error_msg() . " for key '$key'");
|
||||||
|
|
||||||
|
// Try a different approach - manually construct a valid JSON array
|
||||||
|
// Split the buffer by objects (each starting with { and ending with })
|
||||||
|
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||||||
|
|
||||||
|
if (!empty($matches[0])) {
|
||||||
|
// Take the first $maxItems objects
|
||||||
|
$objects = array_slice($matches[0], 0, $maxItems);
|
||||||
|
|
||||||
|
// Construct a valid JSON array
|
||||||
|
$validJson = '[' . implode(',', $objects) . ']';
|
||||||
|
|
||||||
|
// Try to parse the valid JSON
|
||||||
|
$parsedData = json_decode($validJson, true);
|
||||||
|
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
} else {
|
||||||
|
error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (is_array($parsedData)) {
|
||||||
|
// Limit the number of items to prevent memory exhaustion
|
||||||
|
$result = array_slice($parsedData, 0, $maxItems);
|
||||||
|
}
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
error_log("Exception parsing JSON for key '$key': " . $e->getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we've found a complete item (when we see a closing brace followed by a comma)
|
||||||
|
// This is used to count items and limit the number of items processed
|
||||||
|
if (preg_match('/\}\s*,\s*$/m', $line)) {
|
||||||
|
$itemCount++;
|
||||||
|
|
||||||
|
// If we've reached the maximum number of items, stop processing
|
||||||
|
if ($itemCount >= $maxItems) {
|
||||||
|
$collectingItems = false;
|
||||||
|
|
||||||
|
// Create a valid JSON array with the items we've collected so far
|
||||||
|
// We need to ensure the buffer ends with a complete JSON object and a closing bracket
|
||||||
|
|
||||||
|
// First, find the last complete object (ending with })
|
||||||
|
$lastObjectEnd = strrpos($buffer, '}');
|
||||||
|
if ($lastObjectEnd !== false) {
|
||||||
|
// Truncate the buffer at the end of the last complete object
|
||||||
|
$buffer = substr($buffer, 0, $lastObjectEnd + 1);
|
||||||
|
// Add the closing bracket for the array
|
||||||
|
$buffer .= ']';
|
||||||
|
|
||||||
|
// Try to parse the buffer as JSON
|
||||||
|
try {
|
||||||
|
$parsedData = json_decode($buffer, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
error_log("JSON parse error after max items: " . json_last_error_msg() . " for key '$key'");
|
||||||
|
|
||||||
|
// Try a different approach - manually construct a valid JSON array
|
||||||
|
// Split the buffer by objects (each starting with { and ending with })
|
||||||
|
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||||||
|
|
||||||
|
if (!empty($matches[0])) {
|
||||||
|
// Take the first $maxItems objects
|
||||||
|
$objects = array_slice($matches[0], 0, $maxItems);
|
||||||
|
|
||||||
|
// Construct a valid JSON array
|
||||||
|
$validJson = '[' . implode(',', $objects) . ']';
|
||||||
|
|
||||||
|
// Try to parse the valid JSON
|
||||||
|
$parsedData = json_decode($validJson, true);
|
||||||
|
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
} else {
|
||||||
|
error_log("Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
}
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
error_log("Exception parsing JSON after max items for key '$key': " . $e->getMessage());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error_log("Could not find the end of the last complete object for key '$key'");
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
error_log("Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts a scalar value from a large JSON file by key without loading the entire file into memory
|
||||||
|
*
|
||||||
|
* @param string $filePath Path to the JSON file
|
||||||
|
* @param string $key The key of the scalar value to extract
|
||||||
|
* @return mixed The extracted scalar value or null if not found
|
||||||
|
*/
|
||||||
|
private function extractJsonScalarByKey(string $filePath, string $key): mixed
|
||||||
|
{
|
||||||
|
// First, check if the file exists and is readable
|
||||||
|
if (!is_readable($filePath)) {
|
||||||
|
error_log("File is not readable: $filePath");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// For very large files, we'll use a more efficient approach
|
||||||
|
// We'll search for the specified key directly
|
||||||
|
$handle = fopen($filePath, 'r');
|
||||||
|
if (!$handle) {
|
||||||
|
error_log("Could not open file: $filePath");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip ahead to find the specified key more quickly
|
||||||
|
$found = false;
|
||||||
|
$searchKey = '"' . $key . '"';
|
||||||
|
$value = null;
|
||||||
|
|
||||||
|
while (!$found && ($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, $searchKey) !== false) {
|
||||||
|
$found = true;
|
||||||
|
|
||||||
|
// Extract everything after the key
|
||||||
|
$keyPos = strpos($line, $searchKey);
|
||||||
|
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||||||
|
|
||||||
|
// Check if the value is on this line
|
||||||
|
if (strpos($afterKey, ':') !== false) {
|
||||||
|
$colonPos = strpos($afterKey, ':');
|
||||||
|
$afterColon = trim(substr($afterKey, $colonPos + 1));
|
||||||
|
|
||||||
|
// Extract the value based on its type
|
||||||
|
if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
} elseif (preg_match('/^(\d+)/', $afterColon, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
} elseif (preg_match('/^(true|false)/', $afterColon, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
} elseif (strpos($afterColon, 'null') === 0) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
} else {
|
||||||
|
// The value might be on the next line or more complex
|
||||||
|
// For simplicity, we'll just use the regex approach as a fallback
|
||||||
|
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
} elseif (strpos($line, 'null') !== false) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
} else {
|
||||||
|
error_log("Could not extract value for key '$key' from line: " . trim($line));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The value might be on the next line
|
||||||
|
error_log("Value for key '$key' might be on the next line, using fallback method");
|
||||||
|
|
||||||
|
// Read the next line
|
||||||
|
$nextLine = fgets($handle);
|
||||||
|
if ($nextLine !== false) {
|
||||||
|
$combinedLine = $line . $nextLine;
|
||||||
|
|
||||||
|
// Try to extract the value using regex
|
||||||
|
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
} elseif (strpos($combinedLine, 'null') !== false) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
} else {
|
||||||
|
error_log("Could not extract value for key '$key' from combined lines");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
if (!$found) {
|
||||||
|
error_log("Key '$key' not found in file: $filePath");
|
||||||
|
} else if ($value === null) {
|
||||||
|
error_log("Value for key '$key' is null or could not be extracted");
|
||||||
|
}
|
||||||
|
|
||||||
|
return $value;
|
||||||
|
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
error_log("Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts the specific_pages array from a large JSON file without loading the entire file into memory
|
* Extracts the specific_pages array from a large JSON file without loading the entire file into memory
|
||||||
|
* This is a legacy method kept for backward compatibility
|
||||||
*
|
*
|
||||||
* @param string $filePath Path to the JSON file
|
* @param string $filePath Path to the JSON file
|
||||||
* @param int $maxPages Maximum number of pages to extract (to prevent memory exhaustion)
|
* @param int $maxPages Maximum number of pages to extract (to prevent memory exhaustion)
|
||||||
|
@ -1859,114 +2205,6 @@ EOT;
|
||||||
*/
|
*/
|
||||||
private function extractSpecificPagesFromJson(string $filePath, int $maxPages = 100): array
|
private function extractSpecificPagesFromJson(string $filePath, int $maxPages = 100): array
|
||||||
{
|
{
|
||||||
$specificPages = [];
|
return $this->extractJsonArrayByKey($filePath, 'specific_pages', $maxPages);
|
||||||
|
|
||||||
// For very large files, we'll use a more direct approach
|
|
||||||
// Instead of parsing the entire JSON structure, we'll extract just what we need
|
|
||||||
|
|
||||||
// First, check if the file exists and is readable
|
|
||||||
if (!is_readable($filePath)) {
|
|
||||||
return $specificPages;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the file size
|
|
||||||
$fileSize = filesize($filePath);
|
|
||||||
if ($fileSize === false || $fileSize === 0) {
|
|
||||||
return $specificPages;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For very large files, we'll use a more efficient approach
|
|
||||||
// We'll search for the "specific_pages" key directly
|
|
||||||
$handle = fopen($filePath, 'r');
|
|
||||||
if (!$handle) {
|
|
||||||
return $specificPages;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Variables to track parsing state
|
|
||||||
$inSpecificPages = false;
|
|
||||||
$bracketCount = 0;
|
|
||||||
$buffer = '';
|
|
||||||
$pageCount = 0;
|
|
||||||
$lineCount = 0;
|
|
||||||
|
|
||||||
// Skip ahead to find the specific_pages key more quickly
|
|
||||||
// This is a simple optimization for this specific file structure
|
|
||||||
$found = false;
|
|
||||||
while (!$found && ($line = fgets($handle)) !== false) {
|
|
||||||
$lineCount++;
|
|
||||||
if (strpos($line, '"specific_pages"') !== false) {
|
|
||||||
$found = true;
|
|
||||||
$inSpecificPages = true;
|
|
||||||
|
|
||||||
// Find the opening bracket of the array
|
|
||||||
if (strpos($line, '[') !== false) {
|
|
||||||
$bracketCount = 1;
|
|
||||||
$buffer = '['; // Start the buffer with an opening bracket
|
|
||||||
} else {
|
|
||||||
// If the opening bracket is on the next line
|
|
||||||
$nextLine = fgets($handle);
|
|
||||||
if ($nextLine !== false && strpos($nextLine, '[') !== false) {
|
|
||||||
$bracketCount = 1;
|
|
||||||
$buffer = '['; // Start the buffer with an opening bracket
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we didn't find the specific_pages key, return empty array
|
|
||||||
if (!$found) {
|
|
||||||
fclose($handle);
|
|
||||||
return $specificPages;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now process the specific_pages array
|
|
||||||
while (($line = fgets($handle)) !== false) {
|
|
||||||
// Count opening and closing brackets to track array nesting
|
|
||||||
$openBrackets = substr_count($line, '[') + substr_count($line, '{');
|
|
||||||
$closeBrackets = substr_count($line, ']') + substr_count($line, '}');
|
|
||||||
$bracketCount += $openBrackets - $closeBrackets;
|
|
||||||
|
|
||||||
// Add the line to our buffer
|
|
||||||
$buffer .= $line;
|
|
||||||
|
|
||||||
// If we've reached the end of the array (bracketCount = 0)
|
|
||||||
if ($bracketCount === 0) {
|
|
||||||
// Parse the buffer as JSON
|
|
||||||
$parsedData = json_decode($buffer, true);
|
|
||||||
if (is_array($parsedData)) {
|
|
||||||
// Limit the number of pages to prevent memory exhaustion
|
|
||||||
$specificPages = array_slice($parsedData, 0, $maxPages);
|
|
||||||
} else {
|
|
||||||
// If parsing fails, log the error but don't crash
|
|
||||||
error_log('Failed to parse specific_pages JSON data in ' . $filePath);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we've found a complete page object (when we see a closing brace followed by a comma)
|
|
||||||
if (preg_match('/\}\s*,\s*$/m', $line)) {
|
|
||||||
$pageCount++;
|
|
||||||
// If we've reached the maximum number of pages, stop processing
|
|
||||||
if ($pageCount >= $maxPages) {
|
|
||||||
// Close the array properly
|
|
||||||
$buffer = rtrim($buffer, ",\r\n") . ']';
|
|
||||||
// Parse the buffer as JSON
|
|
||||||
$parsedData = json_decode($buffer, true);
|
|
||||||
if (is_array($parsedData)) {
|
|
||||||
$specificPages = $parsedData;
|
|
||||||
} else {
|
|
||||||
// If parsing fails, log the error but don't crash
|
|
||||||
error_log('Failed to parse specific_pages JSON data in ' . $filePath . ' after reaching max pages');
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close the file
|
|
||||||
fclose($handle);
|
|
||||||
|
|
||||||
return $specificPages;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -293,20 +293,20 @@
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
{% if page.en_page.description_img_url is defined and page.en_page.description_img_url %}
|
{% if page.en_page is defined and page.en_page.description_img_url is defined and page.en_page.description_img_url %}
|
||||||
<div class="me-3">
|
<div class="me-3">
|
||||||
<img src="{{ page.en_page.description_img_url }}"
|
<img src="{{ page.en_page.description_img_url }}"
|
||||||
alt="{{ page.key }}"
|
alt="{{ page.title }}"
|
||||||
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<strong>{{ page.key }}</strong>
|
<strong>{{ page.title }}</strong>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{{ page.reason }}
|
{# {{ page.reason }}#}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{% if page.staleness_score is defined %}
|
{% if page.staleness_score is defined %}
|
||||||
|
@ -326,26 +326,54 @@
|
||||||
</td>
|
</td>
|
||||||
<td class="text-center">
|
<td class="text-center">
|
||||||
<div class="btn-group" role="group">
|
<div class="btn-group" role="group">
|
||||||
<a href="{{ page.en_page.url }}" target="_blank"
|
{% if page.en_page is defined and page.en_page.url is defined %}
|
||||||
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
<a href="{{ page.en_page.url }}" target="_blank"
|
||||||
<i class="bi bi-translate"></i> EN
|
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
||||||
</a>
|
<i class="bi bi-translate"></i> EN
|
||||||
{% if page.fr_page %}
|
|
||||||
<a href="{{ page.fr_page.url }}" target="_blank"
|
|
||||||
class="btn btn-sm btn-outline-info" title="Version française">
|
|
||||||
<i class="bi bi-translate"></i> FR
|
|
||||||
</a>
|
|
||||||
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
|
||||||
class="btn btn-sm btn-outline-secondary"
|
|
||||||
title="Comparer les versions">
|
|
||||||
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
|
||||||
</a>
|
</a>
|
||||||
|
{% endif %}
|
||||||
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
|
{% if page.fr_page.url is defined %}
|
||||||
|
<a href="{{ page.fr_page.url }}" target="_blank"
|
||||||
|
class="btn btn-sm btn-outline-info" title="Version française">
|
||||||
|
<i class="bi bi-translate"></i> FR
|
||||||
|
</a>
|
||||||
|
{% endif %}
|
||||||
|
{% if page.key is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% else %}
|
{% else %}
|
||||||
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
{% if page.key is defined %}
|
||||||
class="btn btn-sm btn-success"
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
||||||
title="Créer une traduction française">
|
class="btn btn-sm btn-success"
|
||||||
<i class="bi bi-plus-circle"></i> Traduire
|
title="Créer une traduction française">
|
||||||
</a>
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-success"
|
||||||
|
title="Créer une traduction française">
|
||||||
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-plus-circle"></i> Clé manquante
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -79,8 +79,10 @@ python3 wiki_compare.py</code></pre>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for page in regular_pages|slice(0, 20) %}
|
{% for page in regular_pages|slice(0, 20) %}
|
||||||
<tr>
|
<tr>
|
||||||
<td><strong>{{ page.key }}</strong></td>
|
<td><strong>{{ page.title }}</strong></td>
|
||||||
<td>{{ page.reason }}</td>
|
<td>
|
||||||
|
{# {{ page.reason }}#}
|
||||||
|
</td>
|
||||||
<td class="text-center">
|
<td class="text-center">
|
||||||
{% if page.word_diff > 0 %}
|
{% if page.word_diff > 0 %}
|
||||||
<span class="badge bg-danger">{{ page.word_diff }}</span>
|
<span class="badge bg-danger">{{ page.word_diff }}</span>
|
||||||
|
@ -126,22 +128,52 @@ python3 wiki_compare.py</code></pre>
|
||||||
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
||||||
<i class="bi bi-translate"></i> EN
|
<i class="bi bi-translate"></i> EN
|
||||||
</a>
|
</a>
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<a href="{{ page.fr_page.url }}" target="_blank"
|
{% if page.fr_page.url is defined %}
|
||||||
class="btn btn-sm btn-outline-info" title="Version française">
|
<a href="{{ page.fr_page.url }}" target="_blank"
|
||||||
<i class="bi bi-translate"></i> FR
|
class="btn btn-sm btn-outline-info" title="Version française">
|
||||||
</a>
|
<i class="bi bi-translate"></i> FR
|
||||||
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
</a>
|
||||||
class="btn btn-sm btn-outline-secondary"
|
{% else %}
|
||||||
title="Comparer les versions">
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
<i class="bi bi-translate"></i> FR (URL manquante)
|
||||||
</a>
|
</button>
|
||||||
|
{% endif %}
|
||||||
|
{% if page.key is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% else %}
|
{% else %}
|
||||||
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
{% if page.key is defined %}
|
||||||
class="btn btn-sm btn-success"
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
||||||
title="Créer une traduction française">
|
class="btn btn-sm btn-success"
|
||||||
<i class="bi bi-plus-circle"></i> Traduire
|
title="Créer une traduction française">
|
||||||
</a>
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-success"
|
||||||
|
title="Créer une traduction française">
|
||||||
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-plus-circle"></i> Clé manquante
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
|
@ -178,17 +210,17 @@ python3 wiki_compare.py</code></pre>
|
||||||
{% if page.en_page.description_img_url is defined and page.en_page.description_img_url %}
|
{% if page.en_page.description_img_url is defined and page.en_page.description_img_url %}
|
||||||
<div class="me-3">
|
<div class="me-3">
|
||||||
<img src="{{ page.en_page.description_img_url }}"
|
<img src="{{ page.en_page.description_img_url }}"
|
||||||
alt="{{ page.key }}"
|
alt="{% if page.key is defined %}{{ page.key }}{% elseif page.title is defined %}{{ page.title }}{% else %}Image{% endif %}"
|
||||||
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<strong>{{ page.key }}</strong>
|
<strong>{{ page.title }}</strong>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{{ page.reason }}
|
{# {{ page.reason }}#}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<div class="progress" style="height: 20px;">
|
<div class="progress" style="height: 20px;">
|
||||||
|
@ -208,22 +240,52 @@ python3 wiki_compare.py</code></pre>
|
||||||
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
||||||
<i class="bi bi-translate"></i> EN
|
<i class="bi bi-translate"></i> EN
|
||||||
</a>
|
</a>
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<a href="{{ page.fr_page.url }}" target="_blank"
|
{% if page.fr_page.url is defined %}
|
||||||
class="btn btn-sm btn-outline-info" title="Version française">
|
<a href="{{ page.fr_page.url }}" target="_blank"
|
||||||
<i class="bi bi-translate"></i> FR
|
class="btn btn-sm btn-outline-info" title="Version française">
|
||||||
</a>
|
<i class="bi bi-translate"></i> FR
|
||||||
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
</a>
|
||||||
class="btn btn-sm btn-outline-secondary"
|
{% else %}
|
||||||
title="Comparer les versions">
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
<i class="bi bi-translate"></i> FR (URL manquante)
|
||||||
</a>
|
</button>
|
||||||
|
{% endif %}
|
||||||
|
{% if page.key is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% else %}
|
{% else %}
|
||||||
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
{% if page.key is defined %}
|
||||||
class="btn btn-sm btn-success"
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
||||||
title="Créer une traduction française">
|
class="btn btn-sm btn-success"
|
||||||
<i class="bi bi-plus-circle"></i> Traduire
|
title="Créer une traduction française">
|
||||||
</a>
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-success"
|
||||||
|
title="Créer une traduction française">
|
||||||
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-plus-circle"></i> Clé manquante
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
|
@ -251,7 +313,13 @@ python3 wiki_compare.py</code></pre>
|
||||||
const colors = [];
|
const colors = [];
|
||||||
|
|
||||||
{% for page in regular_pages|slice(0, 20) %}
|
{% for page in regular_pages|slice(0, 20) %}
|
||||||
labels.push("{{ page.key }}");
|
{% if page.key is defined %}
|
||||||
|
labels.push("{{ page.key }}");
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
labels.push("{{ page.title }}");
|
||||||
|
{% else %}
|
||||||
|
labels.push("Page sans clé");
|
||||||
|
{% endif %}
|
||||||
scores.push({{ page.staleness_score }});
|
scores.push({{ page.staleness_score }});
|
||||||
|
|
||||||
// Set color based on score
|
// Set color based on score
|
||||||
|
|
|
@ -11,12 +11,12 @@
|
||||||
|
|
||||||
<div class="card mb-4">
|
<div class="card mb-4">
|
||||||
<div class="card-header bg-primary text-white">
|
<div class="card-header bg-primary text-white">
|
||||||
<h2>{{ page.key }}</h2>
|
<h2>{% if page.key is defined %}{{ page.key }}{% elseif page.title is defined %}{{ page.title }}{% else %}Page sans clé{% endif %}</h2>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<div class="alert alert-info">
|
<div class="alert alert-info">
|
||||||
<h3>Raisons d'amélioration</h3>
|
<h3>Raisons d'amélioration</h3>
|
||||||
<p>{{ page.reason }}</p>
|
{# <p>{{ page.reason }}</p>#}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="row">
|
<div class="row">
|
||||||
|
@ -55,9 +55,9 @@
|
||||||
<div class="card h-100">
|
<div class="card h-100">
|
||||||
<div class="card-header bg-info text-white">
|
<div class="card-header bg-info text-white">
|
||||||
<h3>Version française</h3>
|
<h3>Version française</h3>
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<p class="mb-0">
|
<p class="mb-0">
|
||||||
<small>Dernière modification: {{ page.fr_page.last_modified }}</small>
|
<small>Dernière modification: {{ page.fr_page.last_modified is defined ? page.fr_page.last_modified : 'Non disponible' }}</small>
|
||||||
</p>
|
</p>
|
||||||
{% else %}
|
{% else %}
|
||||||
<p class="mb-0">
|
<p class="mb-0">
|
||||||
|
@ -66,11 +66,11 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<ul class="list-group mb-3">
|
<ul class="list-group mb-3">
|
||||||
<li class="list-group-item d-flex justify-content-between align-items-center">
|
<li class="list-group-item d-flex justify-content-between align-items-center">
|
||||||
Sections
|
Sections
|
||||||
<span class="badge bg-info rounded-pill">{{ page.fr_page.sections }}</span>
|
<span class="badge bg-info rounded-pill">{{ page.fr_page.sections is defined ? page.fr_page.sections : 0 }}</span>
|
||||||
</li>
|
</li>
|
||||||
<li class="list-group-item d-flex justify-content-between align-items-center">
|
<li class="list-group-item d-flex justify-content-between align-items-center">
|
||||||
Mots
|
Mots
|
||||||
|
@ -82,21 +82,38 @@
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
<div class="d-grid gap-2">
|
<div class="d-grid gap-2">
|
||||||
<a href="{{ page.fr_page.url }}" target="_blank" class="btn btn-outline-info">
|
{% if page.fr_page.url is defined %}
|
||||||
<i class="bi bi-box-arrow-up-right"></i> Voir la page française
|
<a href="{{ page.fr_page.url }}" target="_blank" class="btn btn-outline-info">
|
||||||
</a>
|
<i class="bi bi-box-arrow-up-right"></i> Voir la page française
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-box-arrow-up-right"></i> URL non disponible
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="alert alert-warning">
|
<div class="alert alert-warning">
|
||||||
<p><i class="bi bi-exclamation-triangle"></i> <strong>La page wiki pour la clé
|
<p><i class="bi bi-exclamation-triangle"></i> <strong>La page wiki pour la clé
|
||||||
"{{ page.key }}" n'existe pas en français.</strong></p>
|
"{% if page.key is defined %}{{ page.key }}{% elseif page.title is defined %}{{ page.title }}{% else %}Page sans clé{% endif %}" n'existe pas en français.</strong></p>
|
||||||
<p>Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.</p>
|
<p>Vous pouvez contribuer en créant cette page sur le wiki OpenStreetMap.</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="d-grid gap-2">
|
<div class="d-grid gap-2">
|
||||||
<a href="https://wiki.openstreetmap.org/w/index.php?title=FR:{{ page.key }}&action=edit"
|
{% if page.key is defined %}
|
||||||
target="_blank" class="btn btn-success">
|
<a href="https://wiki.openstreetmap.org/w/index.php?title=FR:{{ page.key }}&action=edit"
|
||||||
<i class="bi bi-plus-circle"></i> Créer la page française
|
target="_blank" class="btn btn-success">
|
||||||
</a>
|
<i class="bi bi-plus-circle"></i> Créer la page française
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="https://wiki.openstreetmap.org/w/index.php?title=FR:{{ page.title }}&action=edit"
|
||||||
|
target="_blank" class="btn btn-success">
|
||||||
|
<i class="bi bi-plus-circle"></i> Créer la page française
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-plus-circle"></i> Clé manquante
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
@ -105,9 +122,19 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="mt-4 d-grid gap-2">
|
<div class="mt-4 d-grid gap-2">
|
||||||
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}" class="btn btn-primary">
|
{% if page.key is defined %}
|
||||||
<i class="bi bi-arrows-angle-expand"></i> Voir la comparaison détaillée
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}" class="btn btn-primary">
|
||||||
</a>
|
<i class="bi bi-arrows-angle-expand"></i> Voir la comparaison détaillée
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.title}) }}" class="btn btn-primary">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Voir la comparaison détaillée
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparaison non disponible
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
<a href="{{ path('app_admin_wiki_random_suggestion') }}" class="btn btn-secondary">
|
<a href="{{ path('app_admin_wiki_random_suggestion') }}" class="btn btn-secondary">
|
||||||
<i class="bi bi-shuffle"></i> Autre suggestion aléatoire
|
<i class="bi bi-shuffle"></i> Autre suggestion aléatoire
|
||||||
</a>
|
</a>
|
||||||
|
|
|
@ -206,18 +206,18 @@
|
||||||
{% if page.en_page.description_img_url is defined and page.en_page.description_img_url %}
|
{% if page.en_page.description_img_url is defined and page.en_page.description_img_url %}
|
||||||
<div class="me-3">
|
<div class="me-3">
|
||||||
<img src="{{ page.en_page.description_img_url }}"
|
<img src="{{ page.en_page.description_img_url }}"
|
||||||
alt="{{ page.key }}"
|
alt="{% if page.key is defined %}{{ page.key }}{% elseif page.title is defined %}{{ page.title }}{% else %}Image{% endif %}"
|
||||||
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
style="max-width: 80px; max-height: 60px; object-fit: contain;">
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<strong>{{ page.key }}</strong>
|
<strong>{{ page.title }}</strong>
|
||||||
<span class="badge bg-primary">Spécifique</span>
|
<span class="badge bg-primary">Spécifique</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{{ page.reason }}
|
{# {{ page.reason }}#}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{% if page.staleness_score is defined %}
|
{% if page.staleness_score is defined %}
|
||||||
|
@ -241,22 +241,52 @@
|
||||||
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
class="btn btn-sm btn-outline-primary" title="Version anglaise">
|
||||||
<i class="bi bi-translate"></i> EN
|
<i class="bi bi-translate"></i> EN
|
||||||
</a>
|
</a>
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<a href="{{ page.fr_page.url }}" target="_blank"
|
{% if page.fr_page.url is defined %}
|
||||||
class="btn btn-sm btn-outline-info" title="Version française">
|
<a href="{{ page.fr_page.url }}" target="_blank"
|
||||||
<i class="bi bi-translate"></i> FR
|
class="btn btn-sm btn-outline-info" title="Version française">
|
||||||
</a>
|
<i class="bi bi-translate"></i> FR
|
||||||
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
</a>
|
||||||
class="btn btn-sm btn-outline-secondary"
|
{% else %}
|
||||||
title="Comparer les versions">
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
<i class="bi bi-translate"></i> FR (URL manquante)
|
||||||
</a>
|
</button>
|
||||||
|
{% endif %}
|
||||||
|
{% if page.key is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.key}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_compare', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-outline-secondary"
|
||||||
|
title="Comparer les versions">
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-arrows-angle-expand"></i> Comparer
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% else %}
|
{% else %}
|
||||||
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
{% if page.key is defined %}
|
||||||
class="btn btn-sm btn-success"
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.key}) }}"
|
||||||
title="Créer une traduction française">
|
class="btn btn-sm btn-success"
|
||||||
<i class="bi bi-plus-circle"></i> Traduire
|
title="Créer une traduction française">
|
||||||
</a>
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% elseif page.title is defined %}
|
||||||
|
<a href="{{ path('app_admin_wiki_create_french', {'key': page.title}) }}"
|
||||||
|
class="btn btn-sm btn-success"
|
||||||
|
title="Créer une traduction française">
|
||||||
|
<i class="bi bi-plus-circle"></i> Traduire
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-plus-circle"></i> Clé manquante
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
@ -325,11 +355,19 @@
|
||||||
<i class="bi bi-flag-fill"></i> FR
|
<i class="bi bi-flag-fill"></i> FR
|
||||||
</a>
|
</a>
|
||||||
{% set en_url = page.url|replace({'FR:': ''}) %}
|
{% set en_url = page.url|replace({'FR:': ''}) %}
|
||||||
<a href="https://wiki.openstreetmap.org/w/index.php?title={{ page.key }}&action=edit" target="_blank" class="btn btn-sm btn-outline-primary" title="Créer la version anglaise">
|
{% if page.key is defined %}
|
||||||
|
<a href="https://wiki.openstreetmap.org/w/index.php?title={{ page.key }}&action=edit" target="_blank" class="btn btn-sm btn-outline-primary" title="Créer une traduction anglaise">
|
||||||
title="Créer une traduction anglaise">
|
<i class="bi bi-translate"></i> créer EN
|
||||||
<i class="bi bi-translate"></i> créer EN
|
</a>
|
||||||
</a>
|
{% elseif page.title is defined %}
|
||||||
|
<a href="https://wiki.openstreetmap.org/w/index.php?title={{ page.title }}&action=edit" target="_blank" class="btn btn-sm btn-outline-primary" title="Créer une traduction anglaise">
|
||||||
|
<i class="bi bi-translate"></i> créer EN
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-sm btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-translate"></i> Clé manquante
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<div class="alert alert-info">
|
<div class="alert alert-info">
|
||||||
<h3>Raisons d'amélioration</h3>
|
<h3>Raisons d'amélioration</h3>
|
||||||
<p>{{ page.reason }}</p>
|
{# <p>{{ page.reason }}</p>#}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="row">
|
<div class="row">
|
||||||
|
@ -55,9 +55,9 @@
|
||||||
<div class="card h-100">
|
<div class="card h-100">
|
||||||
<div class="card-header bg-info text-white">
|
<div class="card-header bg-info text-white">
|
||||||
<h3>Version française</h3>
|
<h3>Version française</h3>
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<p class="mb-0">
|
<p class="mb-0">
|
||||||
<small>Dernière modification: {{ page.fr_page.last_modified }}</small>
|
<small>Dernière modification: {{ page.fr_page.last_modified is defined ? page.fr_page.last_modified : 'Non disponible' }}</small>
|
||||||
</p>
|
</p>
|
||||||
{% else %}
|
{% else %}
|
||||||
<p class="mb-0">
|
<p class="mb-0">
|
||||||
|
@ -66,11 +66,11 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
{% if page.fr_page %}
|
{% if page.fr_page is defined and page.fr_page %}
|
||||||
<ul class="list-group mb-3">
|
<ul class="list-group mb-3">
|
||||||
<li class="list-group-item d-flex justify-content-between align-items-center">
|
<li class="list-group-item d-flex justify-content-between align-items-center">
|
||||||
Sections
|
Sections
|
||||||
<span class="badge bg-info rounded-pill">{{ page.fr_page.sections }}</span>
|
<span class="badge bg-info rounded-pill">{{ page.fr_page.sections is defined ? page.fr_page.sections : 0 }}</span>
|
||||||
</li>
|
</li>
|
||||||
<li class="list-group-item d-flex justify-content-between align-items-center">
|
<li class="list-group-item d-flex justify-content-between align-items-center">
|
||||||
Mots
|
Mots
|
||||||
|
@ -82,9 +82,15 @@
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
<div class="d-grid gap-2">
|
<div class="d-grid gap-2">
|
||||||
<a href="{{ page.fr_page.url }}" target="_blank" class="btn btn-outline-info">
|
{% if page.fr_page.url is defined %}
|
||||||
<i class="bi bi-box-arrow-up-right"></i> Voir la page française
|
<a href="{{ page.fr_page.url }}" target="_blank" class="btn btn-outline-info">
|
||||||
</a>
|
<i class="bi bi-box-arrow-up-right"></i> Voir la page française
|
||||||
|
</a>
|
||||||
|
{% else %}
|
||||||
|
<button class="btn btn-outline-secondary" disabled>
|
||||||
|
<i class="bi bi-box-arrow-up-right"></i> URL non disponible
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="alert alert-warning">
|
<div class="alert alert-warning">
|
||||||
|
|
435
test_compare_route.php
Normal file
435
test_compare_route.php
Normal file
|
@ -0,0 +1,435 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
// Set memory limit to match the error condition (128MB)
|
||||||
|
ini_set('memory_limit', '128M');
|
||||||
|
|
||||||
|
// Include the WikiController class
|
||||||
|
require_once __DIR__ . '/vendor/autoload.php';
|
||||||
|
|
||||||
|
// Mock the necessary dependencies
|
||||||
|
class MockController {
|
||||||
|
private $projectDir;
|
||||||
|
|
||||||
|
public function __construct() {
|
||||||
|
$this->projectDir = __DIR__;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getParameter($name) {
|
||||||
|
if ($name === 'kernel.project_dir') {
|
||||||
|
return $this->projectDir;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts an array from a large JSON file by key without loading the entire file into memory
|
||||||
|
*
|
||||||
|
* @param string $filePath Path to the JSON file
|
||||||
|
* @param string $key The key of the array to extract
|
||||||
|
* @param int $maxItems Maximum number of items to extract (to prevent memory exhaustion)
|
||||||
|
* @return array The extracted array
|
||||||
|
*/
|
||||||
|
public function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array
|
||||||
|
{
|
||||||
|
$result = [];
|
||||||
|
|
||||||
|
// First, check if the file exists and is readable
|
||||||
|
if (!is_readable($filePath)) {
|
||||||
|
echo "File is not readable: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the file size
|
||||||
|
$fileSize = filesize($filePath);
|
||||||
|
if ($fileSize === false || $fileSize === 0) {
|
||||||
|
echo "File is empty or size could not be determined: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// For very large files, we'll use a more efficient approach
|
||||||
|
// We'll search for the specified key directly
|
||||||
|
$handle = fopen($filePath, 'r');
|
||||||
|
if (!$handle) {
|
||||||
|
echo "Could not open file: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variables to track parsing state
|
||||||
|
$bracketCount = 0;
|
||||||
|
$buffer = '';
|
||||||
|
$itemCount = 0;
|
||||||
|
$inArray = false;
|
||||||
|
$arrayStarted = false;
|
||||||
|
|
||||||
|
// Skip ahead to find the specified key more quickly
|
||||||
|
$found = false;
|
||||||
|
$searchKey = '"' . $key . '"';
|
||||||
|
|
||||||
|
while (!$found && ($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, $searchKey) !== false) {
|
||||||
|
$found = true;
|
||||||
|
|
||||||
|
// Extract everything after the key
|
||||||
|
$keyPos = strpos($line, $searchKey);
|
||||||
|
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||||||
|
|
||||||
|
// Find the colon and then the opening bracket
|
||||||
|
if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) {
|
||||||
|
$inArray = true;
|
||||||
|
$arrayStarted = true;
|
||||||
|
$bracketPos = strpos($afterKey, '[');
|
||||||
|
$buffer = '['; // Start the buffer with an opening bracket
|
||||||
|
$bracketCount = 1;
|
||||||
|
|
||||||
|
// Add everything after the opening bracket to the buffer
|
||||||
|
$buffer .= substr($afterKey, $bracketPos + 1);
|
||||||
|
} else if (strpos($afterKey, ':') !== false) {
|
||||||
|
// The opening bracket might be on the next line
|
||||||
|
$inArray = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we didn't find the key, return empty array
|
||||||
|
if (!$found) {
|
||||||
|
fclose($handle);
|
||||||
|
echo "Key '$key' not found in file: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found the key but not the opening bracket yet, look for it
|
||||||
|
if ($inArray && !$arrayStarted) {
|
||||||
|
while (($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, '[') !== false) {
|
||||||
|
$bracketPos = strpos($line, '[');
|
||||||
|
$buffer = '['; // Start the buffer with an opening bracket
|
||||||
|
$bracketCount = 1;
|
||||||
|
$arrayStarted = true;
|
||||||
|
|
||||||
|
// Add everything after the opening bracket to the buffer
|
||||||
|
$buffer .= substr($line, $bracketPos + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we still haven't found the opening bracket, something is wrong
|
||||||
|
if (!$arrayStarted) {
|
||||||
|
fclose($handle);
|
||||||
|
echo "Could not find opening bracket for array '$key' in file: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now process the array
|
||||||
|
$collectingItems = true;
|
||||||
|
while ($collectingItems && ($line = fgets($handle)) !== false) {
|
||||||
|
// Count opening and closing brackets to track array nesting
|
||||||
|
$openBrackets = substr_count($line, '[') + substr_count($line, '{');
|
||||||
|
$closeBrackets = substr_count($line, ']') + substr_count($line, '}');
|
||||||
|
$bracketCount += $openBrackets - $closeBrackets;
|
||||||
|
|
||||||
|
// Add the line to our buffer
|
||||||
|
$buffer .= $line;
|
||||||
|
|
||||||
|
// If we've reached the end of the array (bracketCount = 0)
|
||||||
|
if ($bracketCount === 0) {
|
||||||
|
$collectingItems = false;
|
||||||
|
|
||||||
|
// Try to parse the buffer as JSON
|
||||||
|
try {
|
||||||
|
$parsedData = json_decode($buffer, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
echo "JSON parse error: " . json_last_error_msg() . " for key '$key'\n";
|
||||||
|
|
||||||
|
// Try a different approach - manually construct a valid JSON array
|
||||||
|
// Split the buffer by objects (each starting with { and ending with })
|
||||||
|
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||||||
|
|
||||||
|
if (!empty($matches[0])) {
|
||||||
|
// Take the first $maxItems objects
|
||||||
|
$objects = array_slice($matches[0], 0, $maxItems);
|
||||||
|
|
||||||
|
// Construct a valid JSON array
|
||||||
|
$validJson = '[' . implode(',', $objects) . ']';
|
||||||
|
|
||||||
|
// Try to parse the valid JSON
|
||||||
|
$parsedData = json_decode($validJson, true);
|
||||||
|
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
} else {
|
||||||
|
echo "Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (is_array($parsedData)) {
|
||||||
|
// Limit the number of items to prevent memory exhaustion
|
||||||
|
$result = array_slice($parsedData, 0, $maxItems);
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception parsing JSON for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we've found a complete item (when we see a closing brace followed by a comma)
|
||||||
|
// This is used to count items and limit the number of items processed
|
||||||
|
if (preg_match('/\}\s*,\s*$/m', $line)) {
|
||||||
|
$itemCount++;
|
||||||
|
|
||||||
|
// If we've reached the maximum number of items, stop processing
|
||||||
|
if ($itemCount >= $maxItems) {
|
||||||
|
$collectingItems = false;
|
||||||
|
|
||||||
|
// Create a valid JSON array with the items we've collected so far
|
||||||
|
// We need to ensure the buffer ends with a complete JSON object and a closing bracket
|
||||||
|
|
||||||
|
// First, find the last complete object (ending with })
|
||||||
|
$lastObjectEnd = strrpos($buffer, '}');
|
||||||
|
if ($lastObjectEnd !== false) {
|
||||||
|
// Truncate the buffer at the end of the last complete object
|
||||||
|
$buffer = substr($buffer, 0, $lastObjectEnd + 1);
|
||||||
|
// Add the closing bracket for the array
|
||||||
|
$buffer .= ']';
|
||||||
|
|
||||||
|
// Try to parse the buffer as JSON
|
||||||
|
try {
|
||||||
|
$parsedData = json_decode($buffer, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
echo "JSON parse error after max items: " . json_last_error_msg() . " for key '$key'\n";
|
||||||
|
|
||||||
|
// Try a different approach - manually construct a valid JSON array
|
||||||
|
// Split the buffer by objects (each starting with { and ending with })
|
||||||
|
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||||||
|
|
||||||
|
if (!empty($matches[0])) {
|
||||||
|
// Take the first $maxItems objects
|
||||||
|
$objects = array_slice($matches[0], 0, $maxItems);
|
||||||
|
|
||||||
|
// Construct a valid JSON array
|
||||||
|
$validJson = '[' . implode(',', $objects) . ']';
|
||||||
|
|
||||||
|
// Try to parse the valid JSON
|
||||||
|
$parsedData = json_decode($validJson, true);
|
||||||
|
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
} else {
|
||||||
|
echo "Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception parsing JSON after max items for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
echo "Could not find the end of the last complete object for key '$key'\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts a scalar value from a large JSON file by key without loading the entire file into memory
|
||||||
|
*
|
||||||
|
* @param string $filePath Path to the JSON file
|
||||||
|
* @param string $key The key of the scalar value to extract
|
||||||
|
* @return mixed The extracted scalar value or null if not found
|
||||||
|
*/
|
||||||
|
public function extractJsonScalarByKey(string $filePath, string $key): mixed
|
||||||
|
{
|
||||||
|
// First, check if the file exists and is readable
|
||||||
|
if (!is_readable($filePath)) {
|
||||||
|
echo "File is not readable: $filePath\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// For very large files, we'll use a more efficient approach
|
||||||
|
// We'll search for the specified key directly
|
||||||
|
$handle = fopen($filePath, 'r');
|
||||||
|
if (!$handle) {
|
||||||
|
echo "Could not open file: $filePath\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip ahead to find the specified key more quickly
|
||||||
|
$found = false;
|
||||||
|
$searchKey = '"' . $key . '"';
|
||||||
|
$value = null;
|
||||||
|
|
||||||
|
while (!$found && ($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, $searchKey) !== false) {
|
||||||
|
$found = true;
|
||||||
|
|
||||||
|
// Extract everything after the key
|
||||||
|
$keyPos = strpos($line, $searchKey);
|
||||||
|
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||||||
|
|
||||||
|
// Check if the value is on this line
|
||||||
|
if (strpos($afterKey, ':') !== false) {
|
||||||
|
$colonPos = strpos($afterKey, ':');
|
||||||
|
$afterColon = trim(substr($afterKey, $colonPos + 1));
|
||||||
|
|
||||||
|
// Extract the value based on its type
|
||||||
|
if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
} elseif (preg_match('/^(\d+)/', $afterColon, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
} elseif (preg_match('/^(true|false)/', $afterColon, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
} elseif (strpos($afterColon, 'null') === 0) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
} else {
|
||||||
|
// The value might be on the next line or more complex
|
||||||
|
// For simplicity, we'll just use the regex approach as a fallback
|
||||||
|
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
} elseif (strpos($line, 'null') !== false) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
} else {
|
||||||
|
echo "Could not extract value for key '$key' from line: " . trim($line) . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The value might be on the next line
|
||||||
|
echo "Value for key '$key' might be on the next line, using fallback method\n";
|
||||||
|
|
||||||
|
// Read the next line
|
||||||
|
$nextLine = fgets($handle);
|
||||||
|
if ($nextLine !== false) {
|
||||||
|
$combinedLine = $line . $nextLine;
|
||||||
|
|
||||||
|
// Try to extract the value using regex
|
||||||
|
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
} elseif (strpos($combinedLine, 'null') !== false) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
} else {
|
||||||
|
echo "Could not extract value for key '$key' from combined lines\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
if (!$found) {
|
||||||
|
echo "Key '$key' not found in file: $filePath\n";
|
||||||
|
} else if ($value === null) {
|
||||||
|
echo "Value for key '$key' is null or could not be extracted\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return $value;
|
||||||
|
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a mock controller
|
||||||
|
$controller = new MockController();
|
||||||
|
|
||||||
|
// Test the memory-efficient approach
|
||||||
|
echo "Testing memory-efficient approach for /wiki/compare/Key:harassment_prevention route\n";
|
||||||
|
echo "Memory limit: " . ini_get('memory_limit') . "\n\n";
|
||||||
|
|
||||||
|
// Get the file path
|
||||||
|
$jsonFile = __DIR__ . '/wiki_compare/outdated_pages.json';
|
||||||
|
$key = 'Key:harassment_prevention';
|
||||||
|
|
||||||
|
// Check if the file exists
|
||||||
|
if (!file_exists($jsonFile)) {
|
||||||
|
echo "Error: File $jsonFile does not exist\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "File size: " . round(filesize($jsonFile) / (1024 * 1024), 2) . " MB\n\n";
|
||||||
|
|
||||||
|
// Measure memory usage before
|
||||||
|
$memBefore = memory_get_usage();
|
||||||
|
echo "Memory usage before: " . round($memBefore / (1024 * 1024), 2) . " MB\n";
|
||||||
|
|
||||||
|
// Start timer
|
||||||
|
$startTime = microtime(true);
|
||||||
|
|
||||||
|
// Extract data using memory-efficient approach
|
||||||
|
$maxItems = 100;
|
||||||
|
$regularPages = $controller->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems);
|
||||||
|
$specificPages = $controller->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems);
|
||||||
|
$historyEntries = $controller->extractJsonArrayByKey($jsonFile, 'history', $maxItems);
|
||||||
|
|
||||||
|
// Combine regular_pages and specific_pages
|
||||||
|
$allPages = array_merge($regularPages, $specificPages);
|
||||||
|
|
||||||
|
// Find the page with the matching key
|
||||||
|
$targetPage = null;
|
||||||
|
foreach ($allPages as $page) {
|
||||||
|
if (isset($page['key']) && $page['key'] === $key) {
|
||||||
|
$targetPage = $page;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// End timer
|
||||||
|
$endTime = microtime(true);
|
||||||
|
|
||||||
|
// Measure memory usage after
|
||||||
|
$memAfter = memory_get_usage();
|
||||||
|
echo "Memory usage after: " . round($memAfter / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Memory used: " . round(($memAfter - $memBefore) / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Time taken: " . round($endTime - $startTime, 2) . " seconds\n\n";
|
||||||
|
|
||||||
|
// Check if we found the page
|
||||||
|
if ($targetPage) {
|
||||||
|
echo "Successfully found page with key '$key'\n";
|
||||||
|
echo "Page details:\n";
|
||||||
|
echo "- Staleness score: " . ($targetPage['staleness_score'] ?? 'N/A') . "\n";
|
||||||
|
echo "- Date diff: " . ($targetPage['date_diff'] ?? 'N/A') . "\n";
|
||||||
|
echo "- Word diff: " . ($targetPage['word_diff'] ?? 'N/A') . "\n";
|
||||||
|
} else {
|
||||||
|
echo "Page with key '$key' not found\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "\nTest completed successfully without memory exhaustion!\n";
|
464
test_decrepitude.php
Normal file
464
test_decrepitude.php
Normal file
|
@ -0,0 +1,464 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
// Set memory limit to match the error condition (128MB)
|
||||||
|
ini_set('memory_limit', '128M');
|
||||||
|
|
||||||
|
// Path to the large JSON file
|
||||||
|
$outdatedPagesFile = __DIR__ . '/wiki_compare/outdated_pages.json';
|
||||||
|
$histogramFile = __DIR__ . '/wiki_compare/staleness_histogram.png';
|
||||||
|
|
||||||
|
echo "Testing memory usage for decrepitudeScores() method\n";
|
||||||
|
echo "File size: " . round(filesize($outdatedPagesFile) / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Memory limit: " . ini_get('memory_limit') . "\n\n";
|
||||||
|
|
||||||
|
// Test the original approach (loading entire file)
|
||||||
|
function testOriginalApproach($filePath) {
|
||||||
|
echo "Testing original approach (loading entire file)...\n";
|
||||||
|
$memBefore = memory_get_usage();
|
||||||
|
echo "Memory usage before: " . round($memBefore / (1024 * 1024), 2) . " MB\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
$startTime = microtime(true);
|
||||||
|
|
||||||
|
$regularPages = [];
|
||||||
|
$specificPages = [];
|
||||||
|
$lastUpdated = null;
|
||||||
|
|
||||||
|
if (file_exists($filePath)) {
|
||||||
|
$outdatedPagesData = json_decode(file_get_contents($filePath), true);
|
||||||
|
|
||||||
|
if (isset($outdatedPagesData['regular_pages']) && is_array($outdatedPagesData['regular_pages'])) {
|
||||||
|
$regularPages = $outdatedPagesData['regular_pages'];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($outdatedPagesData['specific_pages']) && is_array($outdatedPagesData['specific_pages'])) {
|
||||||
|
$specificPages = $outdatedPagesData['specific_pages'];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($outdatedPagesData['last_updated'])) {
|
||||||
|
$lastUpdated = $outdatedPagesData['last_updated'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$endTime = microtime(true);
|
||||||
|
|
||||||
|
echo "Successfully loaded data:\n";
|
||||||
|
echo "- Regular pages: " . count($regularPages) . "\n";
|
||||||
|
echo "- Specific pages: " . count($specificPages) . "\n";
|
||||||
|
echo "- Last updated: " . ($lastUpdated ?? 'null') . "\n";
|
||||||
|
|
||||||
|
$memAfter = memory_get_usage();
|
||||||
|
echo "Memory usage after: " . round($memAfter / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Memory used: " . round(($memAfter - $memBefore) / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Time taken: " . round($endTime - $startTime, 2) . " seconds\n";
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Error: " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test the new approach (streaming)
|
||||||
|
function testNewApproach($filePath) {
|
||||||
|
echo "Testing new approach (streaming)...\n";
|
||||||
|
$memBefore = memory_get_usage();
|
||||||
|
echo "Memory usage before: " . round($memBefore / (1024 * 1024), 2) . " MB\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
$startTime = microtime(true);
|
||||||
|
|
||||||
|
$regularPages = extractJsonArrayByKey($filePath, 'regular_pages', 100);
|
||||||
|
$specificPages = extractJsonArrayByKey($filePath, 'specific_pages', 100);
|
||||||
|
$lastUpdated = extractJsonScalarByKey($filePath, 'last_updated');
|
||||||
|
|
||||||
|
$endTime = microtime(true);
|
||||||
|
|
||||||
|
echo "Successfully loaded data:\n";
|
||||||
|
echo "- Regular pages: " . count($regularPages) . "\n";
|
||||||
|
echo "- Specific pages: " . count($specificPages) . "\n";
|
||||||
|
echo "- Last updated: " . ($lastUpdated ?? 'null') . "\n";
|
||||||
|
|
||||||
|
$memAfter = memory_get_usage();
|
||||||
|
echo "Memory usage after: " . round($memAfter / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Memory used: " . round(($memAfter - $memBefore) / (1024 * 1024), 2) . " MB\n";
|
||||||
|
echo "Time taken: " . round($endTime - $startTime, 2) . " seconds\n";
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Error: " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation of extractJsonArrayByKey
|
||||||
|
function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array
|
||||||
|
{
|
||||||
|
$result = [];
|
||||||
|
|
||||||
|
// First, check if the file exists and is readable
|
||||||
|
if (!is_readable($filePath)) {
|
||||||
|
echo "File is not readable: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the file size
|
||||||
|
$fileSize = filesize($filePath);
|
||||||
|
if ($fileSize === false || $fileSize === 0) {
|
||||||
|
echo "File is empty or size could not be determined: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// For very large files, we'll use a more efficient approach
|
||||||
|
// We'll search for the specified key directly
|
||||||
|
$handle = fopen($filePath, 'r');
|
||||||
|
if (!$handle) {
|
||||||
|
echo "Could not open file: $filePath\n";
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variables to track parsing state
|
||||||
|
$bracketCount = 0;
|
||||||
|
$buffer = '';
|
||||||
|
$itemCount = 0;
|
||||||
|
$inArray = false;
|
||||||
|
$arrayStarted = false;
|
||||||
|
|
||||||
|
// Skip ahead to find the specified key more quickly
|
||||||
|
$found = false;
|
||||||
|
$searchKey = '"' . $key . '"';
|
||||||
|
$lineCount = 0;
|
||||||
|
|
||||||
|
while (!$found && ($line = fgets($handle)) !== false) {
|
||||||
|
$lineCount++;
|
||||||
|
if ($lineCount % 1000 === 0) {
|
||||||
|
echo "Processed $lineCount lines searching for $key...\r";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strpos($line, $searchKey) !== false) {
|
||||||
|
$found = true;
|
||||||
|
echo "\nFound $key key at line $lineCount\n";
|
||||||
|
|
||||||
|
// Extract everything after the key
|
||||||
|
$keyPos = strpos($line, $searchKey);
|
||||||
|
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||||||
|
|
||||||
|
// Find the colon and then the opening bracket
|
||||||
|
if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) {
|
||||||
|
$inArray = true;
|
||||||
|
$arrayStarted = true;
|
||||||
|
$bracketPos = strpos($afterKey, '[');
|
||||||
|
$buffer = '['; // Start the buffer with an opening bracket
|
||||||
|
$bracketCount = 1;
|
||||||
|
|
||||||
|
// Add everything after the opening bracket to the buffer
|
||||||
|
$buffer .= substr($afterKey, $bracketPos + 1);
|
||||||
|
echo "Opening bracket found on the same line\n";
|
||||||
|
} else if (strpos($afterKey, ':') !== false) {
|
||||||
|
// The opening bracket might be on the next line
|
||||||
|
$inArray = true;
|
||||||
|
echo "Colon found, but opening bracket might be on the next line\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we didn't find the key, return empty array
|
||||||
|
if (!$found) {
|
||||||
|
echo "$key key not found in the file\n";
|
||||||
|
fclose($handle);
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found the key but not the opening bracket yet, look for it
|
||||||
|
if ($inArray && !$arrayStarted) {
|
||||||
|
echo "Looking for opening bracket...\n";
|
||||||
|
while (($line = fgets($handle)) !== false) {
|
||||||
|
if (strpos($line, '[') !== false) {
|
||||||
|
$bracketPos = strpos($line, '[');
|
||||||
|
$buffer = '['; // Start the buffer with an opening bracket
|
||||||
|
$bracketCount = 1;
|
||||||
|
$arrayStarted = true;
|
||||||
|
|
||||||
|
// Add everything after the opening bracket to the buffer
|
||||||
|
$buffer .= substr($line, $bracketPos + 1);
|
||||||
|
echo "Opening bracket found on the next line\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we still haven't found the opening bracket, something is wrong
|
||||||
|
if (!$arrayStarted) {
|
||||||
|
echo "Could not find opening bracket for array '$key' in file: $filePath\n";
|
||||||
|
fclose($handle);
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Processing $key array...\n";
|
||||||
|
|
||||||
|
// Now process the array
|
||||||
|
$collectingItems = true;
|
||||||
|
while ($collectingItems && ($line = fgets($handle)) !== false) {
|
||||||
|
// Count opening and closing brackets to track array nesting
|
||||||
|
$openBrackets = substr_count($line, '[') + substr_count($line, '{');
|
||||||
|
$closeBrackets = substr_count($line, ']') + substr_count($line, '}');
|
||||||
|
$bracketCount += $openBrackets - $closeBrackets;
|
||||||
|
|
||||||
|
// Add the line to our buffer
|
||||||
|
$buffer .= $line;
|
||||||
|
|
||||||
|
// If we've reached the end of the array (bracketCount = 0)
|
||||||
|
if ($bracketCount === 0) {
|
||||||
|
$collectingItems = false;
|
||||||
|
echo "Reached end of $key array\n";
|
||||||
|
|
||||||
|
// Try to parse the buffer as JSON
|
||||||
|
try {
|
||||||
|
$parsedData = json_decode($buffer, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
echo "JSON parse error: " . json_last_error_msg() . " for key '$key'\n";
|
||||||
|
// Debug: output a small part of the buffer
|
||||||
|
echo "Buffer preview (first 100 chars): " . substr($buffer, 0, 100) . "...\n";
|
||||||
|
echo "Buffer preview (last 100 chars): ..." . substr($buffer, -100) . "\n";
|
||||||
|
} else if (is_array($parsedData)) {
|
||||||
|
// Limit the number of items to prevent memory exhaustion
|
||||||
|
$result = array_slice($parsedData, 0, $maxItems);
|
||||||
|
echo "Parsed " . count($result) . " items from the $key array\n";
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception parsing JSON for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we've found a complete item (when we see a closing brace followed by a comma)
|
||||||
|
// This is used to count items and limit the number of items processed
|
||||||
|
if (preg_match('/\}\s*,\s*$/m', $line)) {
|
||||||
|
$itemCount++;
|
||||||
|
if ($itemCount % 10 === 0) {
|
||||||
|
echo "Found $itemCount items in $key array...\r";
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've reached the maximum number of items, stop processing
|
||||||
|
if ($itemCount >= $maxItems) {
|
||||||
|
$collectingItems = false;
|
||||||
|
echo "\nReached maximum number of items ($maxItems) for $key\n";
|
||||||
|
|
||||||
|
// Create a valid JSON array with the items we've collected so far
|
||||||
|
// We need to ensure the buffer ends with a complete JSON object and a closing bracket
|
||||||
|
|
||||||
|
// First, find the last complete object (ending with })
|
||||||
|
$lastObjectEnd = strrpos($buffer, '}');
|
||||||
|
if ($lastObjectEnd !== false) {
|
||||||
|
// Truncate the buffer at the end of the last complete object
|
||||||
|
$buffer = substr($buffer, 0, $lastObjectEnd + 1);
|
||||||
|
// Add the closing bracket for the array
|
||||||
|
$buffer .= ']';
|
||||||
|
|
||||||
|
echo "Truncated buffer and added closing bracket\n";
|
||||||
|
|
||||||
|
// Try to parse the buffer as JSON
|
||||||
|
try {
|
||||||
|
$parsedData = json_decode($buffer, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
echo "JSON parse error after max items: " . json_last_error_msg() . " for key '$key'\n";
|
||||||
|
// Debug: output a small part of the buffer
|
||||||
|
echo "Buffer preview (first 100 chars): " . substr($buffer, 0, 100) . "...\n";
|
||||||
|
echo "Buffer preview (last 100 chars): ..." . substr($buffer, -100) . "\n";
|
||||||
|
|
||||||
|
// Try a different approach - manually construct a valid JSON array
|
||||||
|
echo "Trying alternative approach to construct valid JSON...\n";
|
||||||
|
|
||||||
|
// Split the buffer by objects (each starting with { and ending with })
|
||||||
|
preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches);
|
||||||
|
|
||||||
|
if (!empty($matches[0])) {
|
||||||
|
// Take the first $maxItems objects
|
||||||
|
$objects = array_slice($matches[0], 0, $maxItems);
|
||||||
|
|
||||||
|
// Construct a valid JSON array
|
||||||
|
$validJson = '[' . implode(',', $objects) . ']';
|
||||||
|
|
||||||
|
// Try to parse the valid JSON
|
||||||
|
$parsedData = json_decode($validJson, true);
|
||||||
|
if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
echo "Successfully parsed " . count($result) . " items using alternative approach\n";
|
||||||
|
} else {
|
||||||
|
echo "Alternative approach also failed: " . json_last_error_msg() . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (is_array($parsedData)) {
|
||||||
|
$result = $parsedData;
|
||||||
|
echo "Parsed " . count($result) . " items from the $key array\n";
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception parsing JSON after max items for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
echo "Could not find the end of the last complete object\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation of extractJsonScalarByKey
|
||||||
|
function extractJsonScalarByKey(string $filePath, string $key): mixed
|
||||||
|
{
|
||||||
|
// First, check if the file exists and is readable
|
||||||
|
if (!is_readable($filePath)) {
|
||||||
|
echo "File is not readable: $filePath\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// For very large files, we'll use a more efficient approach
|
||||||
|
// We'll search for the specified key directly
|
||||||
|
$handle = fopen($filePath, 'r');
|
||||||
|
if (!$handle) {
|
||||||
|
echo "Could not open file: $filePath\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip ahead to find the specified key more quickly
|
||||||
|
$found = false;
|
||||||
|
$searchKey = '"' . $key . '"';
|
||||||
|
$value = null;
|
||||||
|
$lineCount = 0;
|
||||||
|
|
||||||
|
while (!$found && ($line = fgets($handle)) !== false) {
|
||||||
|
$lineCount++;
|
||||||
|
if ($lineCount % 1000 === 0) {
|
||||||
|
echo "Processed $lineCount lines searching for $key...\r";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strpos($line, $searchKey) !== false) {
|
||||||
|
$found = true;
|
||||||
|
echo "\nFound $key key at line $lineCount\n";
|
||||||
|
|
||||||
|
// Extract everything after the key
|
||||||
|
$keyPos = strpos($line, $searchKey);
|
||||||
|
$afterKey = substr($line, $keyPos + strlen($searchKey));
|
||||||
|
|
||||||
|
// Check if the value is on this line
|
||||||
|
if (strpos($afterKey, ':') !== false) {
|
||||||
|
$colonPos = strpos($afterKey, ':');
|
||||||
|
$afterColon = trim(substr($afterKey, $colonPos + 1));
|
||||||
|
|
||||||
|
// Extract the value based on its type
|
||||||
|
if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
echo "Extracted string value: $value\n";
|
||||||
|
} elseif (preg_match('/^(\d+)/', $afterColon, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
echo "Extracted numeric value: $value\n";
|
||||||
|
} elseif (preg_match('/^(true|false)/', $afterColon, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
echo "Extracted boolean value: " . ($value ? 'true' : 'false') . "\n";
|
||||||
|
} elseif (strpos($afterColon, 'null') === 0) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
echo "Extracted null value\n";
|
||||||
|
} else {
|
||||||
|
// The value might be on the next line or more complex
|
||||||
|
// For simplicity, we'll just use the regex approach as a fallback
|
||||||
|
echo "Using fallback method to extract value\n";
|
||||||
|
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
echo "Extracted string value: $value\n";
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
echo "Extracted numeric value: $value\n";
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
echo "Extracted boolean value: " . ($value ? 'true' : 'false') . "\n";
|
||||||
|
} elseif (strpos($line, 'null') !== false) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
echo "Extracted null value\n";
|
||||||
|
} else {
|
||||||
|
echo "Could not extract value for key '$key' from line: " . trim($line) . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The value might be on the next line
|
||||||
|
echo "Value for key '$key' might be on the next line, using fallback method\n";
|
||||||
|
|
||||||
|
// Read the next line
|
||||||
|
$nextLine = fgets($handle);
|
||||||
|
if ($nextLine !== false) {
|
||||||
|
$combinedLine = $line . $nextLine;
|
||||||
|
|
||||||
|
// Try to extract the value using regex
|
||||||
|
if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) {
|
||||||
|
// String value
|
||||||
|
$value = $matches[1];
|
||||||
|
echo "Extracted string value: $value\n";
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) {
|
||||||
|
// Numeric value
|
||||||
|
$value = intval($matches[1]);
|
||||||
|
echo "Extracted numeric value: $value\n";
|
||||||
|
} elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) {
|
||||||
|
// Boolean value
|
||||||
|
$value = ($matches[1] === 'true');
|
||||||
|
echo "Extracted boolean value: " . ($value ? 'true' : 'false') . "\n";
|
||||||
|
} elseif (strpos($combinedLine, 'null') !== false) {
|
||||||
|
// Null value
|
||||||
|
$value = null;
|
||||||
|
echo "Extracted null value\n";
|
||||||
|
} else {
|
||||||
|
echo "Could not extract value for key '$key' from combined lines\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
fclose($handle);
|
||||||
|
|
||||||
|
if (!$found) {
|
||||||
|
echo "Key '$key' not found in file: $filePath\n";
|
||||||
|
} else if ($value === null) {
|
||||||
|
echo "Value for key '$key' is null or could not be extracted\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return $value;
|
||||||
|
|
||||||
|
} catch (Exception $e) {
|
||||||
|
echo "Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage() . "\n";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip the original approach since we know it fails with memory exhaustion
|
||||||
|
echo "=== ORIGINAL APPROACH ===\n";
|
||||||
|
echo "Skipping original approach - known to fail with memory exhaustion\n\n";
|
||||||
|
|
||||||
|
// Then try our new streaming approach
|
||||||
|
echo "=== NEW STREAMING APPROACH ===\n";
|
||||||
|
testNewApproach($outdatedPagesFile);
|
||||||
|
|
||||||
|
echo "Done testing!\n";
|
|
@ -98,6 +98,7 @@ SPECIFIC_PAGES = [
|
||||||
"Tag:harassment_prevention=ask_angela",
|
"Tag:harassment_prevention=ask_angela",
|
||||||
"Key:harassment_prevention",
|
"Key:harassment_prevention",
|
||||||
"Proposal process",
|
"Proposal process",
|
||||||
|
"Outil de Manipulation et d'Organisation",
|
||||||
"Automated_Edits_code_of_conduct",
|
"Automated_Edits_code_of_conduct",
|
||||||
"Key:cuisine",
|
"Key:cuisine",
|
||||||
"Libre_Charge_Map",
|
"Libre_Charge_Map",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue