<?php
class GitPacks {
private const MAX_READ = 1040576;
private const MAX_RAM = 1048576;
private string $objectsPath;
private array $packFiles;
private ?string $lastPack = null;
private array $fileHandles = [];
private array $fanoutCache = [];
private array $shaBucketCache = [];
private array $offsetBucketCache = [];
public function __construct( string $objectsPath ) {
$this->objectsPath = $objectsPath;
$this->packFiles = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
}
public function __destruct() {
foreach( $this->fileHandles as $handle ) {
if( is_resource( $handle ) ) {
fclose( $handle );
}
}
}
public function peek( string $sha, int $len = 12 ): ?string {
$info = $this->findPackInfo( $sha );
if( $info['offset'] === -1 ) {
return null;
}
$handle = $this->getHandle( $info['file'] );
if( !$handle ) {
return null;
}
return $this->readPackEntry( $handle, $info['offset'], $len, $len );
}
public function read( string $sha ): ?string {
$info = $this->findPackInfo( $sha );
if( $info['offset'] === -1 ) {
return null;
}
$size = $this->extractPackedSize( $info['file'], $info['offset'] );
if( $size > self::MAX_RAM ) {
return null;
}
$handle = $this->getHandle( $info['file'] );
return $handle
? $this->readPackEntry( $handle, $info['offset'], $size )
: null;
}
public function stream( string $sha, callable $callback ): bool {
$info = $this->findPackInfo( $sha );
if( $info['offset'] === -1 ) {
return false;
}
$size = $this->extractPackedSize( $info['file'], $info['offset'] );
$handle = $this->getHandle( $info['file'] );
if( !$handle ) {
return false;
}
return $this->streamPackEntry(
$handle,
$info['offset'],
$size,
$callback
);
}
public function getSize( string $sha ): ?int {
$info = $this->findPackInfo( $sha );
if( $info['offset'] === -1 ) {
return null;
}
return $this->extractPackedSize( $info['file'], $info['offset'] );
}
private function findPackInfo( string $sha ): array {
if( !ctype_xdigit( $sha ) || strlen( $sha ) !== 40 ) {
return ['offset' => -1];
}
$binarySha = hex2bin( $sha );
if( $this->lastPack ) {
$offset = $this->findInIdx( $this->lastPack, $binarySha );
if( $offset !== -1 ) {
return $this->makeResult( $this->lastPack, $offset );
}
}
foreach( $this->packFiles as $indexFile ) {
if( $indexFile === $this->lastPack ) {
continue;
}
$offset = $this->findInIdx( $indexFile, $binarySha );
if( $offset !== -1 ) {
$this->lastPack = $indexFile;
return $this->makeResult( $indexFile, $offset );
}
}
return ['offset' => -1];
}
private function makeResult( string $indexPath, int $offset ): array {
return [
'file' => str_replace( '.idx', '.pack', $indexPath ),
'offset' => $offset
];
}
private function findInIdx( string $indexFile, string $binarySha ): int {
$fileHandle = $this->getHandle( $indexFile );
if( !$fileHandle ) {
return -1;
}
if( !isset( $this->fanoutCache[$indexFile] ) ) {
fseek( $fileHandle, 0 );
if( fread( $fileHandle, 8 ) === "\377tOc\0\0\0\2" ) {
$this->fanoutCache[$indexFile] = array_values(
unpack( 'N*', fread( $fileHandle, 1024 ) )
);
} else {
return -1;
}
}
$fanout = $this->fanoutCache[$indexFile];
$firstByte = ord( $binarySha[0] );
$start = $firstByte === 0 ? 0 : $fanout[$firstByte - 1];
$end = $fanout[$firstByte];
if( $end <= $start ) {
return -1;
}
$cacheKey = "$indexFile:$firstByte";
if( !isset( $this->shaBucketCache[$cacheKey] ) ) {
$count = $end - $start;
fseek( $fileHandle, 1032 + ($start * 20) );
$this->shaBucketCache[$cacheKey] = fread( $fileHandle, $count * 20 );
fseek(
$fileHandle,
1032 + ($fanout[255] * 24) + ($start * 4)
);
$this->offsetBucketCache[$cacheKey] = fread( $fileHandle, $count * 4 );
}
$shaBlock = $this->shaBucketCache[$cacheKey];
$count = strlen( $shaBlock ) / 20;
$low = 0;
$high = $count - 1;
$foundIdx = -1;
while( $low <= $high ) {
$mid = ($low + $high) >> 1;
$compare = substr( $shaBlock, $mid * 20, 20 );
if( $compare < $binarySha ) {
$low = $mid + 1;
} elseif( $compare > $binarySha ) {
$high = $mid - 1;
} else {
$foundIdx = $mid;
break;
}
}
if( $foundIdx === -1 ) {
return -1;
}
$offsetData = substr(
$this->offsetBucketCache[$cacheKey],
$foundIdx * 4,
4
);
$offset = unpack( 'N', $offsetData )[1];
if( $offset & 0x80000000 ) {
$packTotal = $fanout[255];
$pos64 = 1032 + ($packTotal * 28) +
(($offset & 0x7FFFFFFF) * 8);
fseek( $fileHandle, $pos64 );
$offset = unpack( 'J', fread( $fileHandle, 8 ) )[1];
}
return (int)$offset;
}
private function readPackEntry(
$fileHandle,
int $offset,
int $expectedSize,
int $cap = 0
): string {
fseek( $fileHandle, $offset );
$header = $this->readVarInt( $fileHandle );
$type = ($header['byte'] >> 4) & 7;
if( $type === 6 ) {
return $this->handleOfsDelta(
$fileHandle,
$offset,
$expectedSize,
$cap
);
}
if( $type === 7 ) {
return $this->handleRefDelta( $fileHandle, $expectedSize, $cap );
}
return $this->decompressToString( $fileHandle, $expectedSize, $cap );
}
private function streamPackEntry(
$fileHandle,
int $offset,
int $expectedSize,
callable $callback
): bool {
fseek( $fileHandle, $offset );
$header = $this->readVarInt( $fileHandle );
$type = ($header['byte'] >> 4) & 7;
if( $type === 6 || $type === 7 ) {
return $this->streamDeltaObject(
$fileHandle,
$offset,
$type,
$expectedSize,
$callback
);
}
return $this->streamDecompression( $fileHandle, $callback );
}
private function streamDeltaObject(
$fileHandle,
int $offset,
int $type,
int $expectedSize,
callable $callback
): bool {
fseek( $fileHandle, $offset );
$this->readVarInt( $fileHandle );
if( $type === 6 ) {
$byte = ord( fread( $fileHandle, 1 ) );
$negative = $byte & 127;
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
$negative = (($negative + 1) << 7) | ($byte & 127);
}
$deltaPos = ftell( $fileHandle );
$baseOffset = $offset - $negative;
$base = '';
$this->streamPackEntry(
$fileHandle,
$baseOffset,
0,
function( $chunk ) use ( &$base ) { $base .= $chunk; }
);
fseek( $fileHandle, $deltaPos );
} else {
$baseSha = bin2hex( fread( $fileHandle, 20 ) );
$base = '';
$streamed = $this->stream(
$baseSha,
function( $chunk ) use ( &$base ) { $base .= $chunk; }
);
if( !$streamed ) {
return false;
}
}
$inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
if( $inflator === false ) {
return false;
}
$headerState = 0;
$buffer = '';
while( !feof( $fileHandle ) ) {
$chunk = fread( $fileHandle, 8192 );
if( $chunk === false || $chunk === '' ) {
break;
}
$data = @inflate_add( $inflator, $chunk );
if( $data === false ) {
break;
}
$buffer .= $data;
while( true ) {
$bufLen = strlen( $buffer );
if( $bufLen === 0 ) {
break;
}
if( $headerState < 2 ) {
$pos = 0;
while( $pos < $bufLen && (ord( $buffer[$pos] ) & 128) ) {
$pos++;
}
if( $pos === $bufLen && (ord( $buffer[$pos - 1] ) & 128) ) {
break;
}
$pos++;
$buffer = substr( $buffer, $pos );
$headerState++;
continue;
}
$opcode = ord( $buffer[0] );
if( $opcode & 128 ) {
$needed = 1;
if( $opcode & 0x01 ) { $needed++; }
if( $opcode & 0x02 ) { $needed++; }
if( $opcode & 0x04 ) { $needed++; }
if( $opcode & 0x08 ) { $needed++; }
if( $opcode & 0x10 ) { $needed++; }
if( $opcode & 0x20 ) { $needed++; }
if( $opcode & 0x40 ) { $needed++; }
if( $bufLen < $needed ) {
break;
}
$off = 0;
$len = 0;
$p = 1;
if( $opcode & 0x01 ) { $off |= ord( $buffer[$p++] ); }
if( $opcode & 0x02 ) { $off |= ord( $buffer[$p++] ) << 8; }
if( $opcode & 0x04 ) { $off |= ord( $buffer[$p++] ) << 16; }
if( $opcode & 0x08 ) { $off |= ord( $buffer[$p++] ) << 24; }
if( $opcode & 0x10 ) { $len |= ord( $buffer[$p++] ); }
if( $opcode & 0x20 ) { $len |= ord( $buffer[$p++] ) << 8; }
if( $opcode & 0x40 ) { $len |= ord( $buffer[$p++] ) << 16; }
if( $len === 0 ) { $len = 0x10000; }
$callback( substr( $base, $off, $len ) );
$buffer = substr( $buffer, $needed );
} else {
$len = $opcode & 127;
if( $bufLen < 1 + $len ) {
break;
}
$callback( substr( $buffer, 1, $len ) );
$buffer = substr( $buffer, 1 + $len );
}
}
if( inflate_get_status( $inflator ) === ZLIB_STREAM_END ) {
break;
}
}
return true;
}
private function streamDecompression(
$fileHandle,
callable $callback
): bool {
$inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
if( $inflator === false ) {
return false;
}
while( !feof( $fileHandle ) ) {
$chunk = fread( $fileHandle, 8192 );
if( $chunk === false || $chunk === '' ) {
break;
}
$data = @inflate_add( $inflator, $chunk );
if( $data !== false && $data !== '' ) {
$callback( $data );
}
if(
$data === false ||
inflate_get_status( $inflator ) === ZLIB_STREAM_END
) {
break;
}
}
return true;
}
private function decompressToString(
$fileHandle,
int $maxSize,
int $cap = 0
): string {
$inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
if( $inflator === false ) {
return '';
}
$result = '';
while( !feof( $fileHandle ) ) {
$chunk = fread( $fileHandle, 8192 );
if( $chunk === false || $chunk === '' ) {
break;
}
$data = @inflate_add( $inflator, $chunk );
if( $data !== false ) {
$result .= $data;
}
if( $cap > 0 && strlen( $result ) >= $cap ) {
return substr( $result, 0, $cap );
}
if(
$data === false ||
inflate_get_status( $inflator ) === ZLIB_STREAM_END
) {
break;
}
}
return $result;
}
private function extractPackedSize( string $packPath, int $offset ): int {
$fileHandle = $this->getHandle( $packPath );
if( !$fileHandle ) {
return 0;
}
fseek( $fileHandle, $offset );
$header = $this->readVarInt( $fileHandle );
$size = $header['value'];
$type = ($header['byte'] >> 4) & 7;
if( $type === 6 || $type === 7 ) {
return $this->readDeltaTargetSize( $fileHandle, $type );
}
return $size;
}
private function handleOfsDelta(
$fileHandle,
int $offset,
int $expectedSize,
int $cap = 0
): string {
$byte = ord( fread( $fileHandle, 1 ) );
$negative = $byte & 127;
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
$negative = (($negative + 1) << 7) | ($byte & 127);
}
$currentPos = ftell( $fileHandle );
$baseOffset = $offset - $negative;
fseek( $fileHandle, $baseOffset );
$baseHeader = $this->readVarInt( $fileHandle );
$baseSize = $baseHeader['value'];
fseek( $fileHandle, $baseOffset );
$base = $this->readPackEntry( $fileHandle, $baseOffset, $baseSize, $cap );
fseek( $fileHandle, $currentPos );
$remainingBytes = min( self::MAX_READ, max( $expectedSize * 2, 1048576 ) );
$compressed = fread( $fileHandle, $remainingBytes );
$delta = @gzuncompress( $compressed ) ?: '';
return $this->applyDelta( $base, $delta, $cap );
}
private function handleRefDelta(
$fileHandle,
int $expectedSize,
int $cap = 0
): string {
$baseSha = bin2hex( fread( $fileHandle, 20 ) );
if( $cap > 0 ) {
$base = $this->peek( $baseSha, $cap ) ?? '';
} else {
$base = $this->read( $baseSha ) ?? '';
}
$remainingBytes = min( self::MAX_READ, max( $expectedSize * 2, 1048576 ) );
$compressed = fread( $fileHandle, $remainingBytes );
$delta = @gzuncompress( $compressed ) ?: '';
return $this->applyDelta( $base, $delta, $cap );
}
private function applyDelta( string $base, string $delta, int $cap = 0 ): string {
$position = 0;
$this->skipSize( $delta, $position );
$this->skipSize( $delta, $position );
$output = '';
$deltaLength = strlen( $delta );
while( $position < $deltaLength ) {
if( $cap > 0 && strlen( $output ) >= $cap ) {
break;
}
$opcode = ord( $delta[$position++] );
if( $opcode & 128 ) {
$offset = 0;
$length = 0;
if( $opcode & 0x01 ) { $offset |= ord( $delta[$position++] ); }
if( $opcode & 0x02 ) { $offset |= ord( $delta[$position++] ) << 8; }
if( $opcode & 0x04 ) { $offset |= ord( $delta[$position++] ) << 16; }
if( $opcode & 0x08 ) { $offset |= ord( $delta[$position++] ) << 24; }
if( $opcode & 0x10 ) { $length |= ord( $delta[$position++] ); }
if( $opcode & 0x20 ) { $length |= ord( $delta[$position++] ) << 8; }
if( $opcode & 0x40 ) { $length |= ord( $delta[$position++] ) << 16; }
if( $length === 0 ) { $length = 0x10000; }
$output .= substr( $base, $offset, $length );
} else {
$length = $opcode & 127;
$output .= substr( $delta, $position, $length );
$position += $length;
}
}
return $output;
}
private function readVarInt( $fileHandle ): array {
$byte = ord( fread( $fileHandle, 1 ) );
$value = $byte & 15;
$shift = 4;
$first = $byte;
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
$value |= (($byte & 127) << $shift);
$shift += 7;
}
return ['value' => $value, 'byte' => $first];
}
private function readDeltaTargetSize( $fileHandle, int $type ): int {
if( $type === 6 ) {
$byte = ord( fread( $fileHandle, 1 ) );
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
}
} else {
fseek( $fileHandle, 20, SEEK_CUR );
}
$inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
if( $inflator === false ) {
return 0;
}
$header = '';
$attempts = 0;
$maxAttempts = 64;
while(
!feof( $fileHandle ) &&
strlen( $header ) < 32 &&
$attempts < $maxAttempts
) {
$chunk = fread( $fileHandle, 512 );
if( $chunk === false || $chunk === '' ) {
break;
}
$output = @inflate_add( $inflator, $chunk, ZLIB_NO_FLUSH );
if( $output !== false ) {
$header .= $output;
}
if( inflate_get_status( $inflator ) === ZLIB_STREAM_END ) {
break;
}
$attempts++;
}
$position = 0;
if( strlen( $header ) > 0 ) {
$this->skipSize( $header, $position );
return $this->readSize( $header, $position );
}
return 0;
}
private function skipSize( string $data, int &$position ): void {
$length = strlen( $data );
while( $position < $length && (ord( $data[$position++] ) & 128) ) {
}
}
private function readSize( string $data, int &$position ): int {
$byte = ord( $data[$position++] );
$value = $byte & 127;
$shift = 7;
while( $byte & 128 ) {
$byte = ord( $data[$position++] );
$value |= (($byte & 127) << $shift);
$shift += 7;
}
return $value;
}
private function getHandle( string $path ) {
if( !isset( $this->fileHandles[$path] ) ) {
$this->fileHandles[$path] = @fopen( $path, 'rb' );
}
return $this->fileHandles[$path];
}
}