<?php require_once __DIR__ . '/CompressionStream.php'; class GitPacks { private const MAX_READ = 1040576; private const MAX_RAM = 1048576; private const MAX_BASE_RAM = 2097152; private const MAX_DEPTH = 200; private string $objectsPath; private array $packFiles; private string $lastPack = ''; private array $fileHandles; private array $fanoutCache; private array $shaBucketCache; private array $offsetBucketCache; public function __construct( string $objectsPath ) { $this->objectsPath = $objectsPath; $this->packFiles = glob( "{$this->objectsPath}/pack/*.idx" ) ?: []; $this->fileHandles = []; $this->fanoutCache = []; $this->shaBucketCache = []; $this->offsetBucketCache = []; } public function __destruct() { foreach( $this->fileHandles as $handle ) { if( is_resource( $handle ) ) { fclose( $handle ); } } } public function peek( string $sha, int $len = 12 ): string { $info = $this->findPackInfo( $sha ); $result = ''; if( $info['offset'] !== 0 ) { $handle = $this->getHandle( $info['file'] ); if( $handle ) { $result = $this->readPackEntry( $handle, $info['offset'], $len, $len ); } } return $result; } public function read( string $sha ): string { $info = $this->findPackInfo( $sha ); $result = ''; if( $info['offset'] !== 0 ) { $size = $this->extractPackedSize( $info['file'], $info['offset'] ); if( $size <= self::MAX_RAM ) { $handle = $this->getHandle( $info['file'] ); if( $handle ) { $result = $this->readPackEntry( $handle, $info['offset'], $size ); } } } return $result; } public function stream( string $sha, callable $callback ): bool { $result = false; foreach( $this->streamGenerator( $sha ) as $chunk ) { $callback( $chunk ); $result = true; } return $result; } public function streamGenerator( string $sha ): Generator { yield from $this->streamShaGenerator( $sha, 0 ); } private function streamShaGenerator( string $sha, int $depth ): Generator { $info = $this->findPackInfo( $sha ); if( $info['offset'] !== 0 ) { $handle = $this->getHandle( $info['file'] ); if( $handle ) { yield from $this->streamPackEntryGenerator( $handle, $info['offset'], $depth ); } } } public function getSize( string $sha ): int { $info = $this->findPackInfo( $sha ); $result = 0; if( $info['offset'] !== 0 ) { $result = $this->extractPackedSize( $info['file'], $info['offset'] ); } return $result; } private function findPackInfo( string $sha ): array { $result = [ 'offset' => 0, 'file' => '' ]; if( strlen( $sha ) === 40 && ctype_xdigit( $sha ) ) { $binarySha = hex2bin( $sha ); if( $this->lastPack !== '' ) { $offset = $this->findInIdx( $this->lastPack, $binarySha ); if( $offset !== 0 ) { $result = [ 'file' => str_replace( '.idx', '.pack', $this->lastPack ), 'offset' => $offset ]; } } if( $result['offset'] === 0 ) { $count = count( $this->packFiles ); $idx = 0; $found = false; while( !$found && $idx < $count ) { $indexFile = $this->packFiles[$idx]; if( $indexFile !== $this->lastPack ) { $offset = $this->findInIdx( $indexFile, $binarySha ); if( $offset !== 0 ) { $this->lastPack = $indexFile; $result = [ 'file' => str_replace( '.idx', '.pack', $indexFile ), 'offset' => $offset ]; $found = true; } } $idx++; } } } return $result; } private function findInIdx( string $indexFile, string $binarySha ): int { $handle = $this->getHandle( $indexFile ); $result = 0; if( $handle ) { if( !isset( $this->fanoutCache[$indexFile] ) ) { fseek( $handle, 0 ); $head = fread( $handle, 8 ); if( $head === "\377tOc\0\0\0\2" ) { $this->fanoutCache[$indexFile] = array_values( unpack( 'N*', fread( $handle, 1024 ) ) ); } } if( isset( $this->fanoutCache[$indexFile] ) ) { $fanout = $this->fanoutCache[$indexFile]; $byte = ord( $binarySha[0] ); $start = $byte === 0 ? 0 : $fanout[$byte - 1]; $end = $fanout[$byte]; if( $end > $start ) { $result = $this->binarySearchIdx( $indexFile, $handle, $start, $end, $binarySha, $fanout[255] ); } } } return $result; } private function binarySearchIdx( string $indexFile, $handle, int $start, int $end, string $binarySha, int $total ): int { $key = "$indexFile:$start"; $count = $end - $start; $result = 0; if( !isset( $this->shaBucketCache[$key] ) ) { fseek( $handle, 1032 + ($start * 20) ); $this->shaBucketCache[$key] = fread( $handle, $count * 20 ); fseek( $handle, 1032 + ($total * 24) + ($start * 4) ); $this->offsetBucketCache[$key] = fread( $handle, $count * 4 ); } $shaBlock = $this->shaBucketCache[$key]; $low = 0; $high = $count - 1; $found = -1; while( $found === -1 && $low <= $high ) { $mid = ($low + $high) >> 1; $cmp = substr( $shaBlock, $mid * 20, 20 ); if( $cmp < $binarySha ) { $low = $mid + 1; } elseif( $cmp > $binarySha ) { $high = $mid - 1; } else { $found = $mid; } } if( $found !== -1 ) { $packed = substr( $this->offsetBucketCache[$key], $found * 4, 4 ); $offset = unpack( 'N', $packed )[1]; if( $offset & 0x80000000 ) { $pos64 = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8); fseek( $handle, $pos64 ); $offset = unpack( 'J', fread( $handle, 8 ) )[1]; } $result = (int)$offset; } return $result; } private function readPackEntry( $handle, int $offset, int $size, int $cap = 0 ): string { fseek( $handle, $offset ); $header = $this->readVarInt( $handle ); $type = ($header['byte'] >> 4) & 7; $result = ''; if( $type === 6 ) { $result = $this->handleOfsDelta( $handle, $offset, $size, $cap ); } elseif( $type === 7 ) { $result = $this->handleRefDelta( $handle, $size, $cap ); } else { $result = $this->decompressToString( $handle, $cap ); } return $result; } private function streamPackEntryGenerator( $handle, int $offset, int $depth ): Generator { fseek( $handle, $offset ); $header = $this->readVarInt( $handle ); $type = ($header['byte'] >> 4) & 7; if( $type === 6 || $type === 7 ) { yield from $this->streamDeltaObjectGenerator( $handle, $offset, $type, $depth ); } else { yield from $this->streamDecompressionGenerator( $handle ); } } private function resolveBaseToTempFile( $packHandle, int $baseOffset, int $depth ) { $tmpHandle = tmpfile(); if( $tmpHandle !== false ) { foreach( $this->streamPackEntryGenerator( $packHandle, $baseOffset, $depth + 1 ) as $chunk ) { fwrite( $tmpHandle, $chunk ); } rewind( $tmpHandle ); } else { error_log( "[GitPacks] tmpfile failed for ofs-delta base at $baseOffset" ); } return $tmpHandle; } private function streamDeltaObjectGenerator( $handle, int $offset, int $type, int $depth ): Generator { if( $depth < self::MAX_DEPTH ) { fseek( $handle, $offset ); $this->readVarInt( $handle ); if( $type === 6 ) { $neg = $this->readOffsetDelta( $handle ); $deltaPos = ftell( $handle ); $baseSize = $this->extractPackedSize( $handle, $offset - $neg ); if( $baseSize > self::MAX_BASE_RAM ) { $tmpHandle = $this->resolveBaseToTempFile( $handle, $offset - $neg, $depth ); if( $tmpHandle !== false ) { fseek( $handle, $deltaPos ); yield from $this->applyDeltaStreamGenerator( $handle, $tmpHandle ); fclose( $tmpHandle ); } } else { $base = ''; foreach( $this->streamPackEntryGenerator( $handle, $offset - $neg, $depth + 1 ) as $chunk ) { $base .= $chunk; } fseek( $handle, $deltaPos ); yield from $this->applyDeltaStreamGenerator( $handle, $base ); } } else { $baseSha = bin2hex( fread( $handle, 20 ) ); $baseSize = $this->getSize( $baseSha ); if( $baseSize > self::MAX_BASE_RAM ) { $tmpHandle = tmpfile(); if( $tmpHandle !== false ) { $written = false; foreach( $this->streamShaGenerator( $baseSha, $depth + 1 ) as $chunk ) { fwrite( $tmpHandle, $chunk ); $written = true; } if( $written ) { rewind( $tmpHandle ); yield from $this->applyDeltaStreamGenerator( $handle, $tmpHandle ); } fclose( $tmpHandle ); } else { error_log( "[GitPacks] tmpfile() failed for ref-delta (sha=$baseSha)" ); } } else { $base = ''; $written = false; foreach( $this->streamShaGenerator( $baseSha, $depth + 1 ) as $chunk ) { $base .= $chunk; $written = true; } if( $written ) { yield from $this->applyDeltaStreamGenerator( $handle, $base ); } } } } else { error_log( "[GitPacks] delta depth limit exceeded at offset $offset" ); } } private function applyDeltaStreamGenerator( $handle, $base ): Generator { $stream = CompressionStream::createInflater(); $state = 0; $buffer = ''; $done = false; $isFile = is_resource( $base ); while( !$done && !feof( $handle ) ) { $chunk = fread( $handle, 8192 ); $done = $chunk === false || $chunk === ''; if( !$done ) { $data = $stream->pump( $chunk ); if( $data !== '' ) { $buffer .= $data; $doneBuffer = false; while( !$doneBuffer ) { $len = strlen( $buffer ); if( $len === 0 ) { $doneBuffer = true; } if( !$doneBuffer ) { if( $state < 2 ) { $pos = 0; while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) { $pos++; } if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) { $doneBuffer = true; } if( !$doneBuffer ) { $buffer = substr( $buffer, $pos + 1 ); $state++; } } else { $op = ord( $buffer[0] ); if( $op & 128 ) { $need = $this->getCopyInstructionSize( $op ); if( $len < 1 + $need ) { $doneBuffer = true; } if( !$doneBuffer ) { $info = $this->parseCopyInstruction( $op, $buffer, 1 ); if( $isFile ) { fseek( $base, $info['off'] ); $rem = $info['len']; while( $rem > 0 ) { $slc = fread( $base, min( 65536, $rem ) ); if( $slc === false || $slc === '' ) { $rem = 0; } else { yield $slc; $rem -= strlen( $slc ); } } } else { yield substr( $base, $info['off'], $info['len'] ); } $buffer = substr( $buffer, 1 + $need ); } } else { $ln = $op & 127; if( $len < 1 + $ln ) { $doneBuffer = true; } if( !$doneBuffer ) { yield substr( $buffer, 1, $ln ); $buffer = substr( $buffer, 1 + $ln ); } } } } } } $done = $stream->finished(); } } } private function streamDecompressionGenerator( $handle ): Generator { $stream = CompressionStream::createInflater(); $done = false; while( !$done && !feof( $handle ) ) { $chunk = fread( $handle, 8192 ); $done = $chunk === false || $chunk === ''; if( !$done ) { $data = $stream->pump( $chunk ); if( $data !== '' ) { yield $data; } $done = $stream->finished(); } } } private function decompressToString( $handle, int $cap = 0 ): string { $stream = CompressionStream::createInflater(); $res = ''; $done = false; while( !$done && !feof( $handle ) ) { $chunk = fread( $handle, 8192 ); $done = $chunk === false || $chunk === ''; if( !$done ) { $data = $stream->pump( $chunk ); if( $data !== '' ) { $res .= $data; } if( $cap > 0 && strlen( $res ) >= $cap ) { $res = substr( $res, 0, $cap ); $done = true; } if( !$done ) { $done = $stream->finished(); } } } return $res; } private function extractPackedSize( $packPathOrHandle, int $offset ): int { $handle = is_resource( $packPathOrHandle ) ? $packPathOrHandle : $this->getHandle( $packPathOrHandle ); $size = 0; if( $handle ) { fseek( $handle, $offset ); $header = $this->readVarInt( $handle ); $size = $header['value']; $type = ($header['byte'] >> 4) & 7; if( $type === 6 || $type === 7 ) { $size = $this->readDeltaTargetSize( $handle, $type ); } } return $size; } private function handleOfsDelta( $handle, int $offset, int $size, int $cap ): string { $neg = $this->readOffsetDelta( $handle ); $cur = ftell( $handle ); $base = $offset - $neg; fseek( $handle, $base ); $bHead = $this->readVarInt( $handle ); fseek( $handle, $base ); $bData = $this->readPackEntry( $handle, $base, $bHead['value'], $cap ); fseek( $handle, $cur ); $rem = min( self::MAX_READ, max( $size * 2, 1048576 ) ); $comp = fread( $handle, $rem ); $delta = @gzuncompress( $comp ) ?: ''; return $this->applyDelta( $bData, $delta, $cap ); } private function handleRefDelta( $handle, int $size, int $cap ): string { $sha = bin2hex( fread( $handle, 20 ) ); $bas = $cap > 0 ? $this->peek( $sha, $cap ) : $this->read( $sha ); $rem = min( self::MAX_READ, max( $size * 2, 1048576 ) ); $cmp = fread( $handle, $rem ); $del = @gzuncompress( $cmp ) ?: ''; return $this->applyDelta( $bas, $del, $cap ); } private function applyDelta( string $base, string $delta, int $cap ): string { $pos = 0; $res = $this->readDeltaSize( $delta, $pos ); $pos += $res['used']; $res = $this->readDeltaSize( $delta, $pos ); $pos += $res['used']; $out = ''; $len = strlen( $delta ); $done = false; while( !$done && $pos < $len ) { if( $cap > 0 && strlen( $out ) >= $cap ) { $done = true; } if( !$done ) { $op = ord( $delta[$pos++] ); if( $op & 128 ) { $info = $this->parseCopyInstruction( $op, $delta, $pos ); $out .= substr( $base, $info['off'], $info['len'] ); $pos += $info['used']; } else { $ln = $op & 127; $out .= substr( $delta, $pos, $ln ); $pos += $ln; } } } return $out; } private function parseCopyInstruction( int $op, string $data, int $pos ): array { $off = 0; $len = 0; $ptr = $pos; if( $op & 0x01 ) { $off |= ord( $data[$ptr++] ); } if( $op & 0x02 ) { $off |= ord( $data[$ptr++] ) << 8; } if( $op & 0x04 ) { $off |= ord( $data[$ptr++] ) << 16; } if( $op & 0x08 ) { $off |= ord( $data[$ptr++] ) << 24; } if( $op & 0x10 ) { $len |= ord( $data[$ptr++] ); } if( $op & 0x20 ) { $len |= ord( $data[$ptr++] ) << 8; } if( $op & 0x40 ) { $len |= ord( $data[$ptr++] ) << 16; } return [ 'off' => $off, 'len' => $len === 0 ? 0x10000 : $len, 'used' => $ptr - $pos ]; } private function getCopyInstructionSize( int $op ): int { $c = $op & 0x7F; $c = $c - (($c >> 1) & 0x55); $c = (($c >> 2) & 0x33) + ($c & 0x33); $c = (($c >> 4) + $c) & 0x0F; return $c; } private function readVarInt( $handle ): array { $byte = ord( fread( $handle, 1 ) ); $val = $byte & 15; $shft = 4; $fst = $byte; while( $byte & 128 ) { $byte = ord( fread( $handle, 1 ) ); $val |= (($byte & 127) << $shft); $shft += 7; } return [ 'value' => $val, 'byte' => $fst ]; } private function readOffsetDelta( $handle ): int { $byte = ord( fread( $handle, 1 ) ); $neg = $byte & 127; while( $byte & 128 ) { $byte = ord( fread( $handle, 1 ) ); $neg = (($neg + 1) << 7) | ($byte & 127); } return $neg; } private function readDeltaTargetSize( $handle, int $type ): int { if( $type === 6 ) { $b = ord( fread( $handle, 1 ) ); while( $b & 128 ) { $b = ord( fread( $handle, 1 ) ); } } else { fseek( $handle, 20, SEEK_CUR ); } $stream = CompressionStream::createInflater(); $head = ''; $try = 0; $done = false; while( !$done && !feof( $handle ) && strlen( $head ) < 32 && $try < 64 ) { $chunk = fread( $handle, 512 ); $done = $chunk === false || $chunk === ''; if( !$done ) { $out = $stream->pump( $chunk ); if( $out !== '' ) { $head .= $out; } $done = $stream->finished(); $try++; } } $pos = 0; $result = 0; if( strlen( $head ) > 0 ) { $res = $this->readDeltaSize( $head, $pos ); $pos += $res['used']; $res = $this->readDeltaSize( $head, $pos ); $result = $res['val']; } return $result; } private function readDeltaSize( string $data, int $pos ): array { $len = strlen( $data ); $val = 0; $shift = 0; $start = $pos; $done = false; while( !$done && $pos < $len ) { $byte = ord( $data[$pos++] ); $val |= ($byte & 0x7F) << $shift; if( !($byte & 0x80) ) { $done = true; } if( !$done ) { $shift += 7; } } return [ 'val' => $val, 'used' => $pos - $start ]; } private function getHandle( string $path ) { if( !isset( $this->fileHandles[$path] ) ) { $this->fileHandles[$path] = @fopen( $path, 'rb' ); } return $this->fileHandles[$path]; } }