projectDir = __DIR__; } public function getParameter($name) { if ($name === 'kernel.project_dir') { return $this->projectDir; } return null; } /** * Extracts an array from a large JSON file by key without loading the entire file into memory * * @param string $filePath Path to the JSON file * @param string $key The key of the array to extract * @param int $maxItems Maximum number of items to extract (to prevent memory exhaustion) * @return array The extracted array */ public function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array { $result = []; // First, check if the file exists and is readable if (!is_readable($filePath)) { echo "File is not readable: $filePath\n"; return $result; } // Get the file size $fileSize = filesize($filePath); if ($fileSize === false || $fileSize === 0) { echo "File is empty or size could not be determined: $filePath\n"; return $result; } try { // For very large files, we'll use a more efficient approach // We'll search for the specified key directly $handle = fopen($filePath, 'r'); if (!$handle) { echo "Could not open file: $filePath\n"; return $result; } // Variables to track parsing state $bracketCount = 0; $buffer = ''; $itemCount = 0; $inArray = false; $arrayStarted = false; // Skip ahead to find the specified key more quickly $found = false; $searchKey = '"' . $key . '"'; while (!$found && ($line = fgets($handle)) !== false) { if (strpos($line, $searchKey) !== false) { $found = true; // Extract everything after the key $keyPos = strpos($line, $searchKey); $afterKey = substr($line, $keyPos + strlen($searchKey)); // Find the colon and then the opening bracket if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) { $inArray = true; $arrayStarted = true; $bracketPos = strpos($afterKey, '['); $buffer = '['; // Start the buffer with an opening bracket $bracketCount = 1; // Add everything after the opening bracket to the buffer $buffer .= substr($afterKey, $bracketPos + 1); } else if (strpos($afterKey, ':') !== false) { // The opening bracket might be on the next line $inArray = true; } break; } } // If we didn't find the key, return empty array if (!$found) { fclose($handle); echo "Key '$key' not found in file: $filePath\n"; return $result; } // If we found the key but not the opening bracket yet, look for it if ($inArray && !$arrayStarted) { while (($line = fgets($handle)) !== false) { if (strpos($line, '[') !== false) { $bracketPos = strpos($line, '['); $buffer = '['; // Start the buffer with an opening bracket $bracketCount = 1; $arrayStarted = true; // Add everything after the opening bracket to the buffer $buffer .= substr($line, $bracketPos + 1); break; } } } // If we still haven't found the opening bracket, something is wrong if (!$arrayStarted) { fclose($handle); echo "Could not find opening bracket for array '$key' in file: $filePath\n"; return $result; } // Now process the array $collectingItems = true; while ($collectingItems && ($line = fgets($handle)) !== false) { // Count opening and closing brackets to track array nesting $openBrackets = substr_count($line, '[') + substr_count($line, '{'); $closeBrackets = substr_count($line, ']') + substr_count($line, '}'); $bracketCount += $openBrackets - $closeBrackets; // Add the line to our buffer $buffer .= $line; // If we've reached the end of the array (bracketCount = 0) if ($bracketCount === 0) { $collectingItems = false; // Try to parse the buffer as JSON try { $parsedData = json_decode($buffer, true); if (json_last_error() !== JSON_ERROR_NONE) { echo "JSON parse error: " . json_last_error_msg() . " for key '$key'\n"; // Try a different approach - manually construct a valid JSON array // Split the buffer by objects (each starting with { and ending with }) preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); if (!empty($matches[0])) { // Take the first $maxItems objects $objects = array_slice($matches[0], 0, $maxItems); // Construct a valid JSON array $validJson = '[' . implode(',', $objects) . ']'; // Try to parse the valid JSON $parsedData = json_decode($validJson, true); if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { $result = $parsedData; } else { echo "Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'\n"; } } } else if (is_array($parsedData)) { // Limit the number of items to prevent memory exhaustion $result = array_slice($parsedData, 0, $maxItems); } } catch (Exception $e) { echo "Exception parsing JSON for key '$key': " . $e->getMessage() . "\n"; } break; } // Check if we've found a complete item (when we see a closing brace followed by a comma) // This is used to count items and limit the number of items processed if (preg_match('/\}\s*,\s*$/m', $line)) { $itemCount++; // If we've reached the maximum number of items, stop processing if ($itemCount >= $maxItems) { $collectingItems = false; // Create a valid JSON array with the items we've collected so far // We need to ensure the buffer ends with a complete JSON object and a closing bracket // First, find the last complete object (ending with }) $lastObjectEnd = strrpos($buffer, '}'); if ($lastObjectEnd !== false) { // Truncate the buffer at the end of the last complete object $buffer = substr($buffer, 0, $lastObjectEnd + 1); // Add the closing bracket for the array $buffer .= ']'; // Try to parse the buffer as JSON try { $parsedData = json_decode($buffer, true); if (json_last_error() !== JSON_ERROR_NONE) { echo "JSON parse error after max items: " . json_last_error_msg() . " for key '$key'\n"; // Try a different approach - manually construct a valid JSON array // Split the buffer by objects (each starting with { and ending with }) preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); if (!empty($matches[0])) { // Take the first $maxItems objects $objects = array_slice($matches[0], 0, $maxItems); // Construct a valid JSON array $validJson = '[' . implode(',', $objects) . ']'; // Try to parse the valid JSON $parsedData = json_decode($validJson, true); if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { $result = $parsedData; } else { echo "Alternative JSON parsing approach also failed: " . json_last_error_msg() . " for key '$key'\n"; } } } else if (is_array($parsedData)) { $result = $parsedData; } } catch (Exception $e) { echo "Exception parsing JSON after max items for key '$key': " . $e->getMessage() . "\n"; } } else { echo "Could not find the end of the last complete object for key '$key'\n"; } break; } } } // Close the file fclose($handle); } catch (Exception $e) { echo "Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage() . "\n"; } return $result; } /** * Extracts a scalar value from a large JSON file by key without loading the entire file into memory * * @param string $filePath Path to the JSON file * @param string $key The key of the scalar value to extract * @return mixed The extracted scalar value or null if not found */ public function extractJsonScalarByKey(string $filePath, string $key): mixed { // First, check if the file exists and is readable if (!is_readable($filePath)) { echo "File is not readable: $filePath\n"; return null; } try { // For very large files, we'll use a more efficient approach // We'll search for the specified key directly $handle = fopen($filePath, 'r'); if (!$handle) { echo "Could not open file: $filePath\n"; return null; } // Skip ahead to find the specified key more quickly $found = false; $searchKey = '"' . $key . '"'; $value = null; while (!$found && ($line = fgets($handle)) !== false) { if (strpos($line, $searchKey) !== false) { $found = true; // Extract everything after the key $keyPos = strpos($line, $searchKey); $afterKey = substr($line, $keyPos + strlen($searchKey)); // Check if the value is on this line if (strpos($afterKey, ':') !== false) { $colonPos = strpos($afterKey, ':'); $afterColon = trim(substr($afterKey, $colonPos + 1)); // Extract the value based on its type if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) { // String value $value = $matches[1]; } elseif (preg_match('/^(\d+)/', $afterColon, $matches)) { // Numeric value $value = intval($matches[1]); } elseif (preg_match('/^(true|false)/', $afterColon, $matches)) { // Boolean value $value = ($matches[1] === 'true'); } elseif (strpos($afterColon, 'null') === 0) { // Null value $value = null; } else { // The value might be on the next line or more complex // For simplicity, we'll just use the regex approach as a fallback if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) { // String value $value = $matches[1]; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) { // Numeric value $value = intval($matches[1]); } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) { // Boolean value $value = ($matches[1] === 'true'); } elseif (strpos($line, 'null') !== false) { // Null value $value = null; } else { echo "Could not extract value for key '$key' from line: " . trim($line) . "\n"; } } } else { // The value might be on the next line echo "Value for key '$key' might be on the next line, using fallback method\n"; // Read the next line $nextLine = fgets($handle); if ($nextLine !== false) { $combinedLine = $line . $nextLine; // Try to extract the value using regex if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) { // String value $value = $matches[1]; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) { // Numeric value $value = intval($matches[1]); } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) { // Boolean value $value = ($matches[1] === 'true'); } elseif (strpos($combinedLine, 'null') !== false) { // Null value $value = null; } else { echo "Could not extract value for key '$key' from combined lines\n"; } } } break; } } // Close the file fclose($handle); if (!$found) { echo "Key '$key' not found in file: $filePath\n"; } else if ($value === null) { echo "Value for key '$key' is null or could not be extracted\n"; } return $value; } catch (Exception $e) { echo "Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage() . "\n"; return null; } } } // Create a mock controller $controller = new MockController(); // Test the memory-efficient approach echo "Testing memory-efficient approach for /wiki/compare/Key:harassment_prevention route\n"; echo "Memory limit: " . ini_get('memory_limit') . "\n\n"; // Get the file path $jsonFile = __DIR__ . '/wiki_compare/outdated_pages.json'; $key = 'Key:harassment_prevention'; // Check if the file exists if (!file_exists($jsonFile)) { echo "Error: File $jsonFile does not exist\n"; exit(1); } echo "File size: " . round(filesize($jsonFile) / (1024 * 1024), 2) . " MB\n\n"; // Measure memory usage before $memBefore = memory_get_usage(); echo "Memory usage before: " . round($memBefore / (1024 * 1024), 2) . " MB\n"; // Start timer $startTime = microtime(true); // Extract data using memory-efficient approach $maxItems = 100; $regularPages = $controller->extractJsonArrayByKey($jsonFile, 'regular_pages', $maxItems); $specificPages = $controller->extractJsonArrayByKey($jsonFile, 'specific_pages', $maxItems); $historyEntries = $controller->extractJsonArrayByKey($jsonFile, 'history', $maxItems); // Combine regular_pages and specific_pages $allPages = array_merge($regularPages, $specificPages); // Find the page with the matching key $targetPage = null; foreach ($allPages as $page) { if (isset($page['key']) && $page['key'] === $key) { $targetPage = $page; break; } } // End timer $endTime = microtime(true); // Measure memory usage after $memAfter = memory_get_usage(); echo "Memory usage after: " . round($memAfter / (1024 * 1024), 2) . " MB\n"; echo "Memory used: " . round(($memAfter - $memBefore) / (1024 * 1024), 2) . " MB\n"; echo "Time taken: " . round($endTime - $startTime, 2) . " seconds\n\n"; // Check if we found the page if ($targetPage) { echo "Successfully found page with key '$key'\n"; echo "Page details:\n"; echo "- Staleness score: " . ($targetPage['staleness_score'] ?? 'N/A') . "\n"; echo "- Date diff: " . ($targetPage['date_diff'] ?? 'N/A') . "\n"; echo "- Word diff: " . ($targetPage['word_diff'] ?? 'N/A') . "\n"; } else { echo "Page with key '$key' not found\n"; } echo "\nTest completed successfully without memory exhaustion!\n";