getMessage() . "\n"; } echo "\n"; } // Test the new approach (streaming) function testNewApproach($filePath) { echo "Testing new approach (streaming)...\n"; $memBefore = memory_get_usage(); echo "Memory usage before: " . round($memBefore / (1024 * 1024), 2) . " MB\n"; try { $startTime = microtime(true); $regularPages = extractJsonArrayByKey($filePath, 'regular_pages', 100); $specificPages = extractJsonArrayByKey($filePath, 'specific_pages', 100); $lastUpdated = extractJsonScalarByKey($filePath, 'last_updated'); $endTime = microtime(true); echo "Successfully loaded data:\n"; echo "- Regular pages: " . count($regularPages) . "\n"; echo "- Specific pages: " . count($specificPages) . "\n"; echo "- Last updated: " . ($lastUpdated ?? 'null') . "\n"; $memAfter = memory_get_usage(); echo "Memory usage after: " . round($memAfter / (1024 * 1024), 2) . " MB\n"; echo "Memory used: " . round(($memAfter - $memBefore) / (1024 * 1024), 2) . " MB\n"; echo "Time taken: " . round($endTime - $startTime, 2) . " seconds\n"; } catch (Exception $e) { echo "Error: " . $e->getMessage() . "\n"; } echo "\n"; } // Implementation of extractJsonArrayByKey function extractJsonArrayByKey(string $filePath, string $key, int $maxItems = 100): array { $result = []; // First, check if the file exists and is readable if (!is_readable($filePath)) { echo "File is not readable: $filePath\n"; return $result; } // Get the file size $fileSize = filesize($filePath); if ($fileSize === false || $fileSize === 0) { echo "File is empty or size could not be determined: $filePath\n"; return $result; } try { // For very large files, we'll use a more efficient approach // We'll search for the specified key directly $handle = fopen($filePath, 'r'); if (!$handle) { echo "Could not open file: $filePath\n"; return $result; } // Variables to track parsing state $bracketCount = 0; $buffer = ''; $itemCount = 0; $inArray = false; $arrayStarted = false; // Skip ahead to find the specified key more quickly $found = false; $searchKey = '"' . $key . '"'; $lineCount = 0; while (!$found && ($line = fgets($handle)) !== false) { $lineCount++; if ($lineCount % 1000 === 0) { echo "Processed $lineCount lines searching for $key...\r"; } if (strpos($line, $searchKey) !== false) { $found = true; echo "\nFound $key key at line $lineCount\n"; // Extract everything after the key $keyPos = strpos($line, $searchKey); $afterKey = substr($line, $keyPos + strlen($searchKey)); // Find the colon and then the opening bracket if (strpos($afterKey, ':') !== false && strpos($afterKey, '[') !== false) { $inArray = true; $arrayStarted = true; $bracketPos = strpos($afterKey, '['); $buffer = '['; // Start the buffer with an opening bracket $bracketCount = 1; // Add everything after the opening bracket to the buffer $buffer .= substr($afterKey, $bracketPos + 1); echo "Opening bracket found on the same line\n"; } else if (strpos($afterKey, ':') !== false) { // The opening bracket might be on the next line $inArray = true; echo "Colon found, but opening bracket might be on the next line\n"; } break; } } // If we didn't find the key, return empty array if (!$found) { echo "$key key not found in the file\n"; fclose($handle); return $result; } // If we found the key but not the opening bracket yet, look for it if ($inArray && !$arrayStarted) { echo "Looking for opening bracket...\n"; while (($line = fgets($handle)) !== false) { if (strpos($line, '[') !== false) { $bracketPos = strpos($line, '['); $buffer = '['; // Start the buffer with an opening bracket $bracketCount = 1; $arrayStarted = true; // Add everything after the opening bracket to the buffer $buffer .= substr($line, $bracketPos + 1); echo "Opening bracket found on the next line\n"; break; } } } // If we still haven't found the opening bracket, something is wrong if (!$arrayStarted) { echo "Could not find opening bracket for array '$key' in file: $filePath\n"; fclose($handle); return $result; } echo "Processing $key array...\n"; // Now process the array $collectingItems = true; while ($collectingItems && ($line = fgets($handle)) !== false) { // Count opening and closing brackets to track array nesting $openBrackets = substr_count($line, '[') + substr_count($line, '{'); $closeBrackets = substr_count($line, ']') + substr_count($line, '}'); $bracketCount += $openBrackets - $closeBrackets; // Add the line to our buffer $buffer .= $line; // If we've reached the end of the array (bracketCount = 0) if ($bracketCount === 0) { $collectingItems = false; echo "Reached end of $key array\n"; // Try to parse the buffer as JSON try { $parsedData = json_decode($buffer, true); if (json_last_error() !== JSON_ERROR_NONE) { echo "JSON parse error: " . json_last_error_msg() . " for key '$key'\n"; // Debug: output a small part of the buffer echo "Buffer preview (first 100 chars): " . substr($buffer, 0, 100) . "...\n"; echo "Buffer preview (last 100 chars): ..." . substr($buffer, -100) . "\n"; } else if (is_array($parsedData)) { // Limit the number of items to prevent memory exhaustion $result = array_slice($parsedData, 0, $maxItems); echo "Parsed " . count($result) . " items from the $key array\n"; } } catch (Exception $e) { echo "Exception parsing JSON for key '$key': " . $e->getMessage() . "\n"; } break; } // Check if we've found a complete item (when we see a closing brace followed by a comma) // This is used to count items and limit the number of items processed if (preg_match('/\}\s*,\s*$/m', $line)) { $itemCount++; if ($itemCount % 10 === 0) { echo "Found $itemCount items in $key array...\r"; } // If we've reached the maximum number of items, stop processing if ($itemCount >= $maxItems) { $collectingItems = false; echo "\nReached maximum number of items ($maxItems) for $key\n"; // Create a valid JSON array with the items we've collected so far // We need to ensure the buffer ends with a complete JSON object and a closing bracket // First, find the last complete object (ending with }) $lastObjectEnd = strrpos($buffer, '}'); if ($lastObjectEnd !== false) { // Truncate the buffer at the end of the last complete object $buffer = substr($buffer, 0, $lastObjectEnd + 1); // Add the closing bracket for the array $buffer .= ']'; echo "Truncated buffer and added closing bracket\n"; // Try to parse the buffer as JSON try { $parsedData = json_decode($buffer, true); if (json_last_error() !== JSON_ERROR_NONE) { echo "JSON parse error after max items: " . json_last_error_msg() . " for key '$key'\n"; // Debug: output a small part of the buffer echo "Buffer preview (first 100 chars): " . substr($buffer, 0, 100) . "...\n"; echo "Buffer preview (last 100 chars): ..." . substr($buffer, -100) . "\n"; // Try a different approach - manually construct a valid JSON array echo "Trying alternative approach to construct valid JSON...\n"; // Split the buffer by objects (each starting with { and ending with }) preg_match_all('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $buffer, $matches); if (!empty($matches[0])) { // Take the first $maxItems objects $objects = array_slice($matches[0], 0, $maxItems); // Construct a valid JSON array $validJson = '[' . implode(',', $objects) . ']'; // Try to parse the valid JSON $parsedData = json_decode($validJson, true); if (json_last_error() === JSON_ERROR_NONE && is_array($parsedData)) { $result = $parsedData; echo "Successfully parsed " . count($result) . " items using alternative approach\n"; } else { echo "Alternative approach also failed: " . json_last_error_msg() . "\n"; } } } else if (is_array($parsedData)) { $result = $parsedData; echo "Parsed " . count($result) . " items from the $key array\n"; } } catch (Exception $e) { echo "Exception parsing JSON after max items for key '$key': " . $e->getMessage() . "\n"; } } else { echo "Could not find the end of the last complete object\n"; } break; } } } // Close the file fclose($handle); } catch (Exception $e) { echo "Exception in extractJsonArrayByKey for key '$key': " . $e->getMessage() . "\n"; } return $result; } // Implementation of extractJsonScalarByKey function extractJsonScalarByKey(string $filePath, string $key): mixed { // First, check if the file exists and is readable if (!is_readable($filePath)) { echo "File is not readable: $filePath\n"; return null; } try { // For very large files, we'll use a more efficient approach // We'll search for the specified key directly $handle = fopen($filePath, 'r'); if (!$handle) { echo "Could not open file: $filePath\n"; return null; } // Skip ahead to find the specified key more quickly $found = false; $searchKey = '"' . $key . '"'; $value = null; $lineCount = 0; while (!$found && ($line = fgets($handle)) !== false) { $lineCount++; if ($lineCount % 1000 === 0) { echo "Processed $lineCount lines searching for $key...\r"; } if (strpos($line, $searchKey) !== false) { $found = true; echo "\nFound $key key at line $lineCount\n"; // Extract everything after the key $keyPos = strpos($line, $searchKey); $afterKey = substr($line, $keyPos + strlen($searchKey)); // Check if the value is on this line if (strpos($afterKey, ':') !== false) { $colonPos = strpos($afterKey, ':'); $afterColon = trim(substr($afterKey, $colonPos + 1)); // Extract the value based on its type if (preg_match('/^"([^"]*)"/', $afterColon, $matches)) { // String value $value = $matches[1]; echo "Extracted string value: $value\n"; } elseif (preg_match('/^(\d+)/', $afterColon, $matches)) { // Numeric value $value = intval($matches[1]); echo "Extracted numeric value: $value\n"; } elseif (preg_match('/^(true|false)/', $afterColon, $matches)) { // Boolean value $value = ($matches[1] === 'true'); echo "Extracted boolean value: " . ($value ? 'true' : 'false') . "\n"; } elseif (strpos($afterColon, 'null') === 0) { // Null value $value = null; echo "Extracted null value\n"; } else { // The value might be on the next line or more complex // For simplicity, we'll just use the regex approach as a fallback echo "Using fallback method to extract value\n"; if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $line, $matches)) { // String value $value = $matches[1]; echo "Extracted string value: $value\n"; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $line, $matches)) { // Numeric value $value = intval($matches[1]); echo "Extracted numeric value: $value\n"; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $line, $matches)) { // Boolean value $value = ($matches[1] === 'true'); echo "Extracted boolean value: " . ($value ? 'true' : 'false') . "\n"; } elseif (strpos($line, 'null') !== false) { // Null value $value = null; echo "Extracted null value\n"; } else { echo "Could not extract value for key '$key' from line: " . trim($line) . "\n"; } } } else { // The value might be on the next line echo "Value for key '$key' might be on the next line, using fallback method\n"; // Read the next line $nextLine = fgets($handle); if ($nextLine !== false) { $combinedLine = $line . $nextLine; // Try to extract the value using regex if (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*"([^"]*)"/', $combinedLine, $matches)) { // String value $value = $matches[1]; echo "Extracted string value: $value\n"; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(\d+)/', $combinedLine, $matches)) { // Numeric value $value = intval($matches[1]); echo "Extracted numeric value: $value\n"; } elseif (preg_match('/"' . preg_quote($key, '/') . '"\s*:\s*(true|false)/', $combinedLine, $matches)) { // Boolean value $value = ($matches[1] === 'true'); echo "Extracted boolean value: " . ($value ? 'true' : 'false') . "\n"; } elseif (strpos($combinedLine, 'null') !== false) { // Null value $value = null; echo "Extracted null value\n"; } else { echo "Could not extract value for key '$key' from combined lines\n"; } } } break; } } // Close the file fclose($handle); if (!$found) { echo "Key '$key' not found in file: $filePath\n"; } else if ($value === null) { echo "Value for key '$key' is null or could not be extracted\n"; } return $value; } catch (Exception $e) { echo "Exception in extractJsonScalarByKey for key '$key': " . $e->getMessage() . "\n"; return null; } } // Skip the original approach since we know it fails with memory exhaustion echo "=== ORIGINAL APPROACH ===\n"; echo "Skipping original approach - known to fail with memory exhaustion\n\n"; // Then try our new streaming approach echo "=== NEW STREAMING APPROACH ===\n"; testNewApproach($outdatedPagesFile); echo "Done testing!\n";