<?php
class GitPacks {
private const MAX_READ = 16777216;
private string $objectsPath;
private array $packFiles;
private ?string $lastPack = null;
private array $fileHandles = [];
private array $fanoutCache = [];
private array $shaBucketCache = [];
private array $offsetBucketCache = [];
public function __construct( string $objectsPath ) {
$this->objectsPath = $objectsPath;
$this->packFiles = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
}
public function __destruct() {
foreach( $this->fileHandles as $handle ) {
if( is_resource( $handle ) ) {
fclose( $handle );
}
}
}
public function read( string $sha ): ?string {
$info = $this->findPackInfo( $sha );
if( $info['offset'] === -1 ) {
return null;
}
$handle = $this->getHandle( $info['file'] );
return $handle
? $this->readPackEntry( $handle, $info['offset'] )
: null;
}
public function getSize( string $sha ): ?int {
$info = $this->findPackInfo( $sha );
if( $info['offset'] === -1 ) {
return null;
}
return $this->extractPackedSize( $info['file'], $info['offset'] );
}
private function findPackInfo( string $sha ): array {
if( !ctype_xdigit( $sha ) || strlen( $sha ) !== 40 ) {
return ['offset' => -1];
}
$binarySha = hex2bin( $sha );
if( $this->lastPack ) {
$offset = $this->findInIdx( $this->lastPack, $binarySha );
if( $offset !== -1 ) {
return $this->makeResult( $this->lastPack, $offset );
}
}
foreach( $this->packFiles as $indexFile ) {
if( $indexFile === $this->lastPack ) {
continue;
}
$offset = $this->findInIdx( $indexFile, $binarySha );
if( $offset !== -1 ) {
$this->lastPack = $indexFile;
return $this->makeResult( $indexFile, $offset );
}
}
return ['offset' => -1];
}
private function makeResult( string $indexPath, int $offset ): array {
return [
'file' => str_replace( '.idx', '.pack', $indexPath ),
'offset' => $offset
];
}
private function findInIdx( string $indexFile, string $binarySha ): int {
$fileHandle = $this->getHandle( $indexFile );
if( !$fileHandle ) {
return -1;
}
if( !isset( $this->fanoutCache[$indexFile] ) ) {
fseek( $fileHandle, 0 );
if( fread( $fileHandle, 8 ) === "\377tOc\0\0\0\2" ) {
$this->fanoutCache[$indexFile] = array_values(
unpack( 'N*', fread( $fileHandle, 1024 ) )
);
} else {
return -1;
}
}
$fanout = $this->fanoutCache[$indexFile];
$firstByte = ord( $binarySha[0] );
$start = $firstByte === 0 ? 0 : $fanout[$firstByte - 1];
$end = $fanout[$firstByte];
if( $end <= $start ) {
return -1;
}
$cacheKey = "$indexFile:$firstByte";
if( !isset( $this->shaBucketCache[$cacheKey] ) ) {
$count = $end - $start;
fseek( $fileHandle, 1032 + ($start * 20) );
$this->shaBucketCache[$cacheKey] = fread( $fileHandle, $count * 20 );
fseek(
$fileHandle,
1032 + ($fanout[255] * 24) + ($start * 4)
);
$this->offsetBucketCache[$cacheKey] = fread( $fileHandle, $count * 4 );
}
$shaBlock = $this->shaBucketCache[$cacheKey];
$count = strlen( $shaBlock ) / 20;
$low = 0;
$high = $count - 1;
$foundIdx = -1;
while( $low <= $high ) {
$mid = ($low + $high) >> 1;
$compare = substr( $shaBlock, $mid * 20, 20 );
if( $compare < $binarySha ) {
$low = $mid + 1;
} elseif( $compare > $binarySha ) {
$high = $mid - 1;
} else {
$foundIdx = $mid;
break;
}
}
if( $foundIdx === -1 ) {
return -1;
}
$offsetData = substr(
$this->offsetBucketCache[$cacheKey],
$foundIdx * 4,
4
);
$offset = unpack( 'N', $offsetData )[1];
if( $offset & 0x80000000 ) {
$packTotal = $fanout[255];
$pos64 = 1032 + ($packTotal * 28) +
(($offset & 0x7FFFFFFF) * 8);
fseek( $fileHandle, $pos64 );
$offset = unpack( 'J', fread( $fileHandle, 8 ) )[1];
}
return (int)$offset;
}
private function readPackEntry( $fileHandle, int $offset ): string {
fseek( $fileHandle, $offset );
$header = $this->readVarInt( $fileHandle );
$type = ($header['byte'] >> 4) & 7;
if( $type === 6 ) {
return $this->handleOfsDelta( $fileHandle, $offset );
}
if( $type === 7 ) {
return $this->handleRefDelta( $fileHandle );
}
$inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
$result = '';
while( !feof( $fileHandle ) ) {
$chunk = fread( $fileHandle, 8192 );
$data = @inflate_add( $inflator, $chunk );
if( $data !== false ) {
$result .= $data;
}
if(
$data === false ||
inflate_get_status( $inflator ) === ZLIB_STREAM_END
) {
break;
}
}
return $result;
}
private function extractPackedSize( string $packPath, int $offset ): int {
$fileHandle = $this->getHandle( $packPath );
if( !$fileHandle ) {
return 0;
}
fseek( $fileHandle, $offset );
$header = $this->readVarInt( $fileHandle );
$size = $header['value'];
$type = ($header['byte'] >> 4) & 7;
if( $type === 6 || $type === 7 ) {
return $this->readDeltaTargetSize( $fileHandle, $type );
}
return $size;
}
private function handleOfsDelta( $fileHandle, int $offset ): string {
$byte = ord( fread( $fileHandle, 1 ) );
$negative = $byte & 127;
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
$negative = (($negative + 1) << 7) | ($byte & 127);
}
$currentPos = ftell( $fileHandle );
$base = $this->readPackEntry( $fileHandle, $offset - $negative );
fseek( $fileHandle, $currentPos );
$delta = @gzuncompress( fread( $fileHandle, self::MAX_READ ) ) ?: '';
return $this->applyDelta( $base, $delta );
}
private function handleRefDelta( $fileHandle ): string {
$baseSha = bin2hex( fread( $fileHandle, 20 ) );
$base = $this->read( $baseSha ) ?? '';
$delta = @gzuncompress( fread( $fileHandle, self::MAX_READ ) ) ?: '';
return $this->applyDelta( $base, $delta );
}
private function applyDelta( string $base, string $delta ): string {
$position = 0;
$this->skipSize( $delta, $position );
$this->skipSize( $delta, $position );
$output = '';
$deltaLength = strlen( $delta );
while( $position < $deltaLength ) {
$opcode = ord( $delta[$position++] );
if( $opcode & 128 ) {
$offset = 0;
$length = 0;
if( $opcode & 0x01 ) {
$offset |= ord( $delta[$position++] );
}
if( $opcode & 0x02 ) {
$offset |= ord( $delta[$position++] ) << 8;
}
if( $opcode & 0x04 ) {
$offset |= ord( $delta[$position++] ) << 16;
}
if( $opcode & 0x08 ) {
$offset |= ord( $delta[$position++] ) << 24;
}
if( $opcode & 0x10 ) {
$length |= ord( $delta[$position++] );
}
if( $opcode & 0x20 ) {
$length |= ord( $delta[$position++] ) << 8;
}
if( $opcode & 0x40 ) {
$length |= ord( $delta[$position++] ) << 16;
}
if( $length === 0 ) {
$length = 0x10000;
}
$output .= substr( $base, $offset, $length );
} else {
$length = $opcode & 127;
$output .= substr( $delta, $position, $length );
$position += $length;
}
}
return $output;
}
private function readVarInt( $fileHandle ): array {
$byte = ord( fread( $fileHandle, 1 ) );
$value = $byte & 15;
$shift = 4;
$first = $byte;
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
$value |= (($byte & 127) << $shift);
$shift += 7;
}
return ['value' => $value, 'byte' => $first];
}
private function readDeltaTargetSize( $fileHandle, int $type ): int {
if( $type === 6 ) {
$byte = ord( fread( $fileHandle, 1 ) );
while( $byte & 128 ) {
$byte = ord( fread( $fileHandle, 1 ) );
}
} else {
fseek( $fileHandle, 20, SEEK_CUR );
}
$inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
$header = '';
while( !feof( $fileHandle ) && strlen( $header ) < 32 ) {
$chunk = fread( $fileHandle, 512 );
$output = @inflate_add( $inflator, $chunk, ZLIB_NO_FLUSH );
if( $output !== false ) {
$header .= $output;
}
if( inflate_get_status( $inflator ) === ZLIB_STREAM_END ) {
break;
}
}
$position = 0;
if( strlen( $header ) > 0 ) {
$this->skipSize( $header, $position );
return $this->readSize( $header, $position );
}
return 0;
}
private function skipSize( string $data, int &$position ): void {
while( ord( $data[$position++] ) & 128 ) {
}
}
private function readSize( string $data, int &$position ): int {
$byte = ord( $data[$position++] );
$value = $byte & 127;
$shift = 7;
while( $byte & 128 ) {
$byte = ord( $data[$position++] );
$value |= (($byte & 127) << $shift);
$shift += 7;
}
return $value;
}
private function getHandle( string $path ) {
if( !isset( $this->fileHandles[$path] ) ) {
$this->fileHandles[$path] = @fopen( $path, 'rb' );
}
return $this->fileHandles[$path];
}
}