Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Separates GitPacks into smaller concepts

AuthorDave Jarvis <email>
Date2026-02-21 11:04:21 GMT-0800
Commit731ccb81ce7e41781e53983676a53e64fb8c556b
Parente52ed77
git/DeltaDecoder.php
+<?php
+require_once __DIR__ . '/CompressionStream.php';
+
+class DeltaDecoder {
+ public function apply( string $base, string $delta, int $cap ): string {
+ $pos = 0;
+ $res = $this->readDeltaSize( $delta, $pos );
+ $pos += $res['used'];
+ $res = $this->readDeltaSize( $delta, $pos );
+ $pos += $res['used'];
+
+ $out = '';
+ $len = strlen( $delta );
+ $done = false;
+
+ while( !$done && $pos < $len ) {
+ if( $cap > 0 && strlen( $out ) >= $cap ) {
+ $done = true;
+ }
+
+ if( !$done ) {
+ $op = ord( $delta[$pos++] );
+
+ if( $op & 128 ) {
+ $info = $this->parseCopyInstruction( $op, $delta, $pos );
+ $out .= substr( $base, $info['off'], $info['len'] );
+ $pos += $info['used'];
+ } else {
+ $ln = $op & 127;
+ $out .= substr( $delta, $pos, $ln );
+ $pos += $ln;
+ }
+ }
+ }
+
+ return $out;
+ }
+
+ public function applyStreamGenerator(
+ mixed $handle,
+ mixed $base
+ ): Generator {
+ $stream = CompressionStream::createInflater();
+ $state = 0;
+ $buffer = '';
+ $isFile = is_resource( $base );
+
+ foreach( $stream->stream( $handle ) as $data ) {
+ $buffer .= $data;
+ $doneBuffer = false;
+
+ while( !$doneBuffer ) {
+ $len = strlen( $buffer );
+
+ if( $len === 0 ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ if( $state < 2 ) {
+ $pos = 0;
+
+ while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) {
+ $pos++;
+ }
+
+ if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ $buffer = substr( $buffer, $pos + 1 );
+ $state++;
+ }
+ } else {
+ $op = ord( $buffer[0] );
+
+ if( $op & 128 ) {
+ $need = $this->calculateCopyInstructionSize( $op );
+
+ if( $len < 1 + $need ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ $info = $this->parseCopyInstruction( $op, $buffer, 1 );
+
+ if( $isFile ) {
+ fseek( $base, $info['off'] );
+
+ $rem = $info['len'];
+
+ while( $rem > 0 ) {
+ $slc = fread( $base, min( 65536, $rem ) );
+
+ if( $slc === false || $slc === '' ) {
+ $rem = 0;
+ } else {
+ yield $slc;
+
+ $rem -= strlen( $slc );
+ }
+ }
+ } else {
+ yield substr( $base, $info['off'], $info['len'] );
+ }
+
+ $buffer = substr( $buffer, 1 + $need );
+ }
+ } else {
+ $ln = $op & 127;
+
+ if( $len < 1 + $ln ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ yield substr( $buffer, 1, $ln );
+
+ $buffer = substr( $buffer, 1 + $ln );
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ public function readDeltaTargetSize( mixed $handle, int $type ): int {
+ if( $type === 6 ) {
+ $byte = ord( fread( $handle, 1 ) );
+
+ while( $byte & 128 ) {
+ $byte = ord( fread( $handle, 1 ) );
+ }
+ } else {
+ fseek( $handle, 20, SEEK_CUR );
+ }
+
+ $stream = CompressionStream::createInflater();
+ $head = '';
+ $try = 0;
+
+ foreach( $stream->stream( $handle, 512 ) as $out ) {
+ $head .= $out;
+ $try++;
+
+ if( strlen( $head ) >= 32 || $try >= 64 ) {
+ break;
+ }
+ }
+
+ $pos = 0;
+ $result = 0;
+
+ if( strlen( $head ) > 0 ) {
+ $res = $this->readDeltaSize( $head, $pos );
+ $pos += $res['used'];
+ $res = $this->readDeltaSize( $head, $pos );
+ $result = $res['val'];
+ }
+
+ return $result;
+ }
+
+ private function parseCopyInstruction(
+ int $op,
+ string $data,
+ int $pos
+ ): array {
+ $off = 0;
+ $len = 0;
+ $ptr = $pos;
+
+ if( $op & 0x01 ) {
+ $off |= ord( $data[$ptr++] );
+ }
+
+ if( $op & 0x02 ) {
+ $off |= ord( $data[$ptr++] ) << 8;
+ }
+
+ if( $op & 0x04 ) {
+ $off |= ord( $data[$ptr++] ) << 16;
+ }
+
+ if( $op & 0x08 ) {
+ $off |= ord( $data[$ptr++] ) << 24;
+ }
+
+ if( $op & 0x10 ) {
+ $len |= ord( $data[$ptr++] );
+ }
+
+ if( $op & 0x20 ) {
+ $len |= ord( $data[$ptr++] ) << 8;
+ }
+
+ if( $op & 0x40 ) {
+ $len |= ord( $data[$ptr++] ) << 16;
+ }
+
+ return [
+ 'off' => $off,
+ 'len' => $len === 0 ? 0x10000 : $len,
+ 'used' => $ptr - $pos
+ ];
+ }
+
+ private function calculateCopyInstructionSize( int $op ): int {
+ $calc = $op & 0x7F;
+ $calc = $calc - ($calc >> 1 & 0x55);
+ $calc = ($calc >> 2 & 0x33) + ($calc & 0x33);
+ $calc = (($calc >> 4) + $calc) & 0x0F;
+
+ return $calc;
+ }
+
+ private function readDeltaSize( string $data, int $pos ): array {
+ $len = strlen( $data );
+ $val = 0;
+ $shift = 0;
+ $start = $pos;
+ $done = false;
+
+ while( !$done && $pos < $len ) {
+ $byte = ord( $data[$pos++] );
+ $val |= ($byte & 0x7F) << $shift;
+
+ if( !($byte & 0x80) ) {
+ $done = true;
+ }
+
+ if( !$done ) {
+ $shift += 7;
+ }
+ }
+
+ return [ 'val' => $val, 'used' => $pos - $start ];
+ }
+}
git/FileHandlePool.php
+<?php
+class FileHandlePool {
+ private array $handles;
+
+ public function __construct() {
+ $this->handles = [];
+ }
+
+ public function __destruct() {
+ foreach( $this->handles as $handle ) {
+ if( is_resource( $handle ) ) {
+ fclose( $handle );
+ }
+ }
+ }
+
+ public function computeInt(
+ string $path,
+ callable $action,
+ int $fallback = 0
+ ): int {
+ $result = $this->withHandle( $path, $action );
+ return is_int( $result ) ? $result : $fallback;
+ }
+
+ public function computeString(
+ string $path,
+ callable $action,
+ string $fallback = ''
+ ): string {
+ $result = $this->withHandle( $path, $action );
+ return is_string( $result ) ? $result : $fallback;
+ }
+
+ public function computeVoid( string $path, callable $action ): void {
+ $this->withHandle( $path, $action );
+ }
+
+ public function streamGenerator(
+ string $path,
+ callable $action
+ ): Generator {
+ $resultGenerator = $this->withHandle( $path, $action );
+
+ if( $resultGenerator instanceof Generator ) {
+ yield from $resultGenerator;
+ }
+ }
+
+ private function withHandle( string $path, callable $action ) {
+ if( !array_key_exists( $path, $this->handles ) ) {
+ $this->handles[$path] = @fopen( $path, 'rb' ) ?: null;
+ }
+
+ $handle = $this->handles[$path] ?? null;
+
+ return is_resource( $handle ) ? $action( $handle ) : null;
+ }
+}
git/GitPacks.php
<?php
-require_once __DIR__ . '/CompressionStream.php';
-
-class GitPacks {
- private const MAX_READ = 1040576;
- private const MAX_RAM = 1048576;
- private const MAX_BASE_RAM = 2097152;
- private const MAX_DEPTH = 200;
-
- private string $objectsPath;
- private array $packFiles;
- private string $lastPack = '';
- private array $fileHandles;
- private array $fanoutCache;
- private array $shaBucketCache;
- private array $offsetBucketCache;
-
- public function __construct( string $objectsPath ) {
- $this->objectsPath = $objectsPath;
- $this->packFiles = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
- $this->fileHandles = [];
- $this->fanoutCache = [];
- $this->shaBucketCache = [];
- $this->offsetBucketCache = [];
- }
-
- public function __destruct() {
- foreach( $this->fileHandles as $handle ) {
- if( is_resource( $handle ) ) {
- fclose( $handle );
- }
- }
- }
-
- public function peek( string $sha, int $len = 12 ): string {
- $info = $this->findPackInfo( $sha );
- $result = '';
-
- if( $info['offset'] !== 0 ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- $result = $this->readPackEntry(
- $handle,
- $info['offset'],
- $len,
- $len
- );
- }
- }
-
- return $result;
- }
-
- public function read( string $sha ): string {
- $info = $this->findPackInfo( $sha );
- $result = '';
-
- if( $info['offset'] !== 0 ) {
- $size = $this->extractPackedSize( $info['file'], $info['offset'] );
-
- if( $size <= self::MAX_RAM ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- $result = $this->readPackEntry(
- $handle,
- $info['offset'],
- $size
- );
- }
- }
- }
-
- return $result;
- }
-
- public function stream( string $sha, callable $callback ): bool {
- $result = false;
-
- foreach( $this->streamGenerator( $sha ) as $chunk ) {
- $callback( $chunk );
- $result = true;
- }
-
- return $result;
- }
-
- public function streamGenerator( string $sha ): Generator {
- yield from $this->streamShaGenerator( $sha, 0 );
- }
-
- public function streamRawCompressed( string $sha ): Generator {
- $info = $this->findPackInfo( $sha );
-
- if( $info['offset'] !== 0 ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( is_resource( $handle ) ) {
- fseek( $handle, $info['offset'] );
-
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
-
- if( $type !== 6 && $type !== 7 ) {
- $stream = CompressionStream::createExtractor();
-
- yield from $stream->stream( $handle );
- }
- }
- }
- }
-
- private function streamShaGenerator( string $sha, int $depth ): Generator {
- $info = $this->findPackInfo( $sha );
-
- if( $info['offset'] !== 0 ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- yield from $this->streamPackEntryGenerator(
- $handle,
- $info['offset'],
- $depth
- );
- }
- }
- }
-
- public function getSize( string $sha ): int {
- $info = $this->findPackInfo( $sha );
- $result = 0;
-
- if( $info['offset'] !== 0 ) {
- $result = $this->extractPackedSize( $info['file'], $info['offset'] );
- }
-
- return $result;
- }
-
- private function findPackInfo( string $sha ): array {
- $result = [ 'offset' => 0, 'file' => '' ];
-
- if( strlen( $sha ) === 40 && ctype_xdigit( $sha ) ) {
- $binarySha = hex2bin( $sha );
-
- if( $this->lastPack !== '' ) {
- $offset = $this->findInIdx( $this->lastPack, $binarySha );
-
- if( $offset !== 0 ) {
- $result = [
- 'file' => str_replace( '.idx', '.pack', $this->lastPack ),
- 'offset' => $offset
- ];
- }
- }
-
- if( $result['offset'] === 0 ) {
- $count = count( $this->packFiles );
- $idx = 0;
- $found = false;
-
- while( !$found && $idx < $count ) {
- $indexFile = $this->packFiles[$idx];
-
- if( $indexFile !== $this->lastPack ) {
- $offset = $this->findInIdx( $indexFile, $binarySha );
-
- if( $offset !== 0 ) {
- $this->lastPack = $indexFile;
- $result = [
- 'file' => str_replace( '.idx', '.pack', $indexFile ),
- 'offset' => $offset
- ];
- $found = true;
- }
- }
-
- $idx++;
- }
- }
- }
-
- return $result;
- }
-
- private function findInIdx( string $indexFile, string $binarySha ): int {
- $handle = $this->getHandle( $indexFile );
- $result = 0;
-
- if( $handle ) {
- if( !isset( $this->fanoutCache[$indexFile] ) ) {
- fseek( $handle, 0 );
- $head = fread( $handle, 8 );
-
- if( $head === "\377tOc\0\0\0\2" ) {
- $this->fanoutCache[$indexFile] = array_values(
- unpack( 'N*', fread( $handle, 1024 ) )
- );
- }
- }
-
- if( isset( $this->fanoutCache[$indexFile] ) ) {
- $fanout = $this->fanoutCache[$indexFile];
- $byte = ord( $binarySha[0] );
- $start = $byte === 0 ? 0 : $fanout[$byte - 1];
- $end = $fanout[$byte];
-
- if( $end > $start ) {
- $result = $this->binarySearchIdx(
- $indexFile,
- $handle,
- $start,
- $end,
- $binarySha,
- $fanout[255]
- );
- }
- }
- }
-
- return $result;
- }
-
- private function binarySearchIdx(
- string $indexFile,
- mixed $handle,
- int $start,
- int $end,
- string $binarySha,
- int $total
- ): int {
- $low = $start;
- $high = $end - 1;
- $result = 0;
-
- while( $result === 0 && $low <= $high ) {
- $mid = ($low + $high) >> 1;
-
- fseek( $handle, 1032 + ($mid * 20) );
-
- $cmp = fread( $handle, 20 );
-
- if( $cmp < $binarySha ) {
- $low = $mid + 1;
- } elseif( $cmp > $binarySha ) {
- $high = $mid - 1;
- } else {
- fseek( $handle, 1032 + ($total * 24) + ($mid * 4) );
-
- $packed = fread( $handle, 4 );
- $offset = unpack( 'N', $packed )[1];
-
- if( $offset & 0x80000000 ) {
- $pos = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8);
-
- fseek( $handle, $pos );
-
- $offset = unpack( 'J', fread( $handle, 8 ) )[1];
- }
-
- $result = (int)$offset;
- }
- }
-
- return $result;
- }
-
- private function readPackEntry(
- $handle,
- int $offset,
- int $size,
- int $cap = 0
- ): string {
- $header = [];
- $type = 0;
- $result = '';
-
- fseek( $handle, $offset );
-
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
-
- if( $type === 6 ) {
- $result = $this->handleOfsDelta( $handle, $offset, $size, $cap );
- } elseif( $type === 7 ) {
- $result = $this->handleRefDelta( $handle, $size, $cap );
- } else {
- $result = $this->inflate( $handle, $cap );
- }
-
- return $result;
- }
-
- private function handleOfsDelta(
- $handle,
- int $offset,
- int $size,
- int $cap
- ): string {
- $neg = $this->readOffsetDelta( $handle );
- $cur = ftell( $handle );
- $base = $offset - $neg;
- $bData = $this->readPackEntry( $handle, $base, 0, $cap );
-
- fseek( $handle, $cur );
-
- $delta = $this->inflate( $handle );
- return $this->applyDelta( $bData, $delta, $cap );
- }
-
- private function handleRefDelta(
- $handle,
- int $size,
- int $cap
- ): string {
- $sha = bin2hex( fread( $handle, 20 ) );
-
- if( $cap > 0 ) {
- $bas = $this->peek( $sha, $cap );
- } else {
- $bas = $this->read( $sha );
- }
-
- $del = $this->inflate( $handle );
- return $this->applyDelta( $bas, $del, $cap );
- }
-
- private function streamPackEntryGenerator(
- $handle,
- int $offset,
- int $depth
- ): Generator {
- fseek( $handle, $offset );
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
-
- if( $type === 6 || $type === 7 ) {
- yield from $this->streamDeltaObjectGenerator(
- $handle,
- $offset,
- $type,
- $depth
- );
- } else {
- $stream = CompressionStream::createInflater();
-
- yield from $stream->stream( $handle );
- }
- }
-
- private function resolveBaseToTempFile(
- $packHandle,
- int $baseOffset,
- int $depth
- ) {
- $tmpHandle = tmpfile();
-
- if( $tmpHandle !== false ) {
- foreach( $this->streamPackEntryGenerator(
- $packHandle,
- $baseOffset,
- $depth + 1
- ) as $chunk ) {
- fwrite( $tmpHandle, $chunk );
- }
-
- rewind( $tmpHandle );
- } else {
- error_log(
- "[GitPacks] tmpfile failed for ofs-delta base at $baseOffset"
- );
- }
-
- return $tmpHandle;
- }
-
- private function streamDeltaObjectGenerator(
- $handle,
- int $offset,
- int $type,
- int $depth
- ): Generator {
- if( $depth < self::MAX_DEPTH ) {
- fseek( $handle, $offset );
- $this->readVarInt( $handle );
-
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $handle );
- $deltaPos = ftell( $handle );
- $baseSize = $this->extractPackedSize( $handle, $offset - $neg );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpHandle = $this->resolveBaseToTempFile(
- $handle,
- $offset - $neg,
- $depth
- );
-
- if( $tmpHandle !== false ) {
- fseek( $handle, $deltaPos );
- yield from $this->applyDeltaStreamGenerator(
- $handle,
- $tmpHandle
- );
-
- fclose( $tmpHandle );
- }
- } else {
- $base = '';
-
- foreach( $this->streamPackEntryGenerator(
- $handle,
- $offset - $neg,
- $depth + 1
- ) as $chunk ) {
- $base .= $chunk;
- }
-
- fseek( $handle, $deltaPos );
- yield from $this->applyDeltaStreamGenerator( $handle, $base );
- }
- } else {
- $baseSha = bin2hex( fread( $handle, 20 ) );
- $baseSize = $this->getSize( $baseSha );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpHandle = tmpfile();
-
- if( $tmpHandle !== false ) {
- $written = false;
-
- foreach( $this->streamShaGenerator(
- $baseSha,
- $depth + 1
- ) as $chunk ) {
- fwrite( $tmpHandle, $chunk );
- $written = true;
- }
-
- if( $written ) {
- rewind( $tmpHandle );
- yield from $this->applyDeltaStreamGenerator(
- $handle,
- $tmpHandle
- );
- }
-
- fclose( $tmpHandle );
- } else {
- error_log(
- "[GitPacks] tmpfile() failed for ref-delta (sha=$baseSha)"
- );
- }
- } else {
- $base = '';
- $written = false;
-
- foreach( $this->streamShaGenerator(
- $baseSha,
- $depth + 1
- ) as $chunk ) {
- $base .= $chunk;
- $written = true;
- }
-
- if( $written ) {
- yield from $this->applyDeltaStreamGenerator( $handle, $base );
- }
- }
- }
- } else {
- error_log( "[GitPacks] delta depth limit exceeded at offset $offset" );
- }
- }
-
- private function applyDeltaStreamGenerator(
- $handle,
- $base
- ): Generator {
- $stream = CompressionStream::createInflater();
- $state = 0;
- $buffer = '';
- $isFile = is_resource( $base );
-
- foreach( $stream->stream( $handle ) as $data ) {
- $buffer .= $data;
- $doneBuffer = false;
-
- while( !$doneBuffer ) {
- $len = strlen( $buffer );
-
- if( $len === 0 ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- if( $state < 2 ) {
- $pos = 0;
-
- while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) {
- $pos++;
- }
-
- if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- $buffer = substr( $buffer, $pos + 1 );
- $state++;
- }
- } else {
- $op = ord( $buffer[0] );
-
- if( $op & 128 ) {
- $need = $this->getCopyInstructionSize( $op );
-
- if( $len < 1 + $need ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- $info = $this->parseCopyInstruction( $op, $buffer, 1 );
-
- if( $isFile ) {
- fseek( $base, $info['off'] );
- $rem = $info['len'];
-
- while( $rem > 0 ) {
- $slc = fread( $base, min( 65536, $rem ) );
-
- if( $slc === false || $slc === '' ) {
- $rem = 0;
- } else {
- yield $slc;
- $rem -= strlen( $slc );
- }
- }
- } else {
- yield substr( $base, $info['off'], $info['len'] );
- }
-
- $buffer = substr( $buffer, 1 + $need );
- }
- } else {
- $ln = $op & 127;
-
- if( $len < 1 + $ln ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- yield substr( $buffer, 1, $ln );
- $buffer = substr( $buffer, 1 + $ln );
- }
- }
- }
- }
- }
- }
- }
-
- private function inflate( $handle, int $cap = 0 ): string {
- $stream = CompressionStream::createInflater();
- $result = '';
-
- foreach( $stream->stream( $handle ) as $data ) {
- $result .= $data;
-
- if( $cap > 0 && strlen( $result ) >= $cap ) {
- $result = substr( $result, 0, $cap );
-
- break;
- }
- }
-
- return $result;
- }
-
- private function readDeltaTargetSize( $handle, int $type ): int {
- if( $type === 6 ) {
- $b = ord( fread( $handle, 1 ) );
-
- while( $b & 128 ) {
- $b = ord( fread( $handle, 1 ) );
- }
- } else {
- fseek( $handle, 20, SEEK_CUR );
- }
-
- $stream = CompressionStream::createInflater();
- $head = '';
- $try = 0;
-
- foreach( $stream->stream( $handle, 512 ) as $out ) {
- $head .= $out;
- $try++;
-
- if( strlen( $head ) >= 32 || $try >= 64 ) {
- break;
- }
- }
-
- $pos = 0;
- $result = 0;
-
- if( strlen( $head ) > 0 ) {
- $res = $this->readDeltaSize( $head, $pos );
- $pos += $res['used'];
- $res = $this->readDeltaSize( $head, $pos );
-
- $result = $res['val'];
- }
-
- return $result;
- }
-
- private function extractPackedSize( $packPathOrHandle, int $offset ): int {
- $handle = is_resource( $packPathOrHandle )
- ? $packPathOrHandle
- : $this->getHandle( $packPathOrHandle );
- $size = 0;
-
- if( $handle ) {
- fseek( $handle, $offset );
- $header = $this->readVarInt( $handle );
- $size = $header['value'];
- $type = ($header['byte'] >> 4) & 7;
-
- if( $type === 6 || $type === 7 ) {
- $size = $this->readDeltaTargetSize( $handle, $type );
- }
- }
-
- return $size;
- }
-
- private function applyDelta( string $base, string $delta, int $cap ): string {
- $pos = 0;
- $res = $this->readDeltaSize( $delta, $pos );
- $pos += $res['used'];
- $res = $this->readDeltaSize( $delta, $pos );
- $pos += $res['used'];
-
- $out = '';
- $len = strlen( $delta );
- $done = false;
-
- while( !$done && $pos < $len ) {
- if( $cap > 0 && strlen( $out ) >= $cap ) {
- $done = true;
- }
-
- if( !$done ) {
- $op = ord( $delta[$pos++] );
-
- if( $op & 128 ) {
- $info = $this->parseCopyInstruction( $op, $delta, $pos );
- $out .= substr( $base, $info['off'], $info['len'] );
- $pos += $info['used'];
- } else {
- $ln = $op & 127;
- $out .= substr( $delta, $pos, $ln );
- $pos += $ln;
- }
- }
- }
-
- return $out;
- }
-
- private function parseCopyInstruction(
- int $op,
- string $data,
- int $pos
- ): array {
- $off = 0;
- $len = 0;
- $ptr = $pos;
-
- if( $op & 0x01 ) {
- $off |= ord( $data[$ptr++] );
- }
-
- if( $op & 0x02 ) {
- $off |= ord( $data[$ptr++] ) << 8;
- }
-
- if( $op & 0x04 ) {
- $off |= ord( $data[$ptr++] ) << 16;
- }
-
- if( $op & 0x08 ) {
- $off |= ord( $data[$ptr++] ) << 24;
- }
-
- if( $op & 0x10 ) {
- $len |= ord( $data[$ptr++] );
- }
-
- if( $op & 0x20 ) {
- $len |= ord( $data[$ptr++] ) << 8;
- }
-
- if( $op & 0x40 ) {
- $len |= ord( $data[$ptr++] ) << 16;
- }
-
- return [
- 'off' => $off,
- 'len' => $len === 0 ? 0x10000 : $len,
- 'used' => $ptr - $pos
- ];
- }
-
- private function getCopyInstructionSize( int $op ): int {
- $c = $op & 0x7F;
- $c = $c - (($c >> 1) & 0x55);
- $c = (($c >> 2) & 0x33) + ($c & 0x33);
- $c = (($c >> 4) + $c) & 0x0F;
-
- return $c;
- }
-
- private function readVarInt( $handle ): array {
- $byte = ord( fread( $handle, 1 ) );
- $val = $byte & 15;
- $shft = 4;
- $fst = $byte;
-
- while( $byte & 128 ) {
- $byte = ord( fread( $handle, 1 ) );
- $val |= (($byte & 127) << $shft);
- $shft += 7;
- }
-
- return [ 'value' => $val, 'byte' => $fst ];
- }
-
- private function readOffsetDelta( $handle ): int {
- $byte = ord( fread( $handle, 1 ) );
- $neg = $byte & 127;
-
- while( $byte & 128 ) {
- $byte = ord( fread( $handle, 1 ) );
- $neg = (($neg + 1) << 7) | ($byte & 127);
- }
-
- return $neg;
- }
-
- private function readDeltaSize( string $data, int $pos ): array {
- $len = strlen( $data );
- $val = 0;
- $shift = 0;
- $start = $pos;
- $done = false;
-
- while( !$done && $pos < $len ) {
- $byte = ord( $data[$pos++] );
- $val |= ($byte & 0x7F) << $shift;
-
- if( !($byte & 0x80) ) {
- $done = true;
- }
-
- if( !$done ) {
- $shift += 7;
- }
- }
-
- return [ 'val' => $val, 'used' => $pos - $start ];
- }
-
- private function getHandle( string $path ) {
- if( !isset( $this->fileHandles[$path] ) ) {
- $this->fileHandles[$path] = @fopen( $path, 'rb' );
- }
-
- return $this->fileHandles[$path];
+require_once __DIR__ . '/FileHandlePool.php';
+require_once __DIR__ . '/PackLocator.php';
+require_once __DIR__ . '/DeltaDecoder.php';
+require_once __DIR__ . '/PackEntryReader.php';
+
+class GitPacks {
+ private const MAX_RAM = 1048576;
+
+ private FileHandlePool $pool;
+ private PackLocator $locator;
+ private PackEntryReader $reader;
+
+ public function __construct( string $objectsPath ) {
+ $this->pool = new FileHandlePool();
+ $this->locator = new PackLocator( $objectsPath );
+ $this->reader = new PackEntryReader( new DeltaDecoder() );
+ }
+
+ public function peek( string $sha, int $len = 12 ): string {
+ $result = '';
+
+ $this->locator->locate(
+ $this->pool,
+ $sha,
+ function( string $packFile, int $offset ) use ( &$result, $len ): void {
+ $result = $this->reader->read(
+ $this->pool,
+ $packFile,
+ $offset,
+ $len,
+ function( string $baseSha, int $cap ): string {
+ return $this->peek( $baseSha, $cap );
+ }
+ );
+ }
+ );
+
+ return $result;
+ }
+
+ public function read( string $sha ): string {
+ $result = '';
+
+ $this->locator->locate(
+ $this->pool,
+ $sha,
+ function( string $packFile, int $offset ) use ( &$result ): void {
+ $size = $this->reader->getSize( $this->pool, $packFile, $offset );
+
+ if( $size <= self::MAX_RAM ) {
+ $result = $this->reader->read(
+ $this->pool,
+ $packFile,
+ $offset,
+ 0,
+ function( string $baseSha, int $cap ): string {
+ $val = '';
+
+ if( $cap > 0 ) {
+ $val = $this->peek( $baseSha, $cap );
+ } else {
+ $val = $this->read( $baseSha );
+ }
+
+ return $val;
+ }
+ );
+ }
+ }
+ );
+
+ return $result;
+ }
+
+ public function stream( string $sha, callable $callback ): bool {
+ $result = false;
+
+ foreach( $this->streamGenerator( $sha ) as $chunk ) {
+ $callback( $chunk );
+
+ $result = true;
+ }
+
+ return $result;
+ }
+
+ public function streamGenerator( string $sha ): Generator {
+ yield from $this->streamShaGenerator( $sha, 0 );
+ }
+
+ public function streamRawCompressed( string $sha ): Generator {
+ $found = false;
+ $file = '';
+ $off = 0;
+
+ $this->locator->locate(
+ $this->pool,
+ $sha,
+ function( string $packFile, int $offset ) use (
+ &$found,
+ &$file,
+ &$off
+ ): void {
+ $found = true;
+ $file = $packFile;
+ $off = $offset;
+ }
+ );
+
+ if( $found ) {
+ yield from $this->reader->streamRawCompressed(
+ $this->pool,
+ $file,
+ $off
+ );
+ }
+ }
+
+ private function streamShaGenerator( string $sha, int $depth ): Generator {
+ $found = false;
+ $file = '';
+ $off = 0;
+
+ $this->locator->locate(
+ $this->pool,
+ $sha,
+ function( string $packFile, int $offset ) use (
+ &$found,
+ &$file,
+ &$off
+ ): void {
+ $found = true;
+ $file = $packFile;
+ $off = $offset;
+ }
+ );
+
+ if( $found ) {
+ yield from $this->reader->streamEntryGenerator(
+ $this->pool,
+ $file,
+ $off,
+ $depth,
+ function( string $baseSha ): int {
+ return $this->getSize( $baseSha );
+ },
+ function( string $baseSha, int $baseDepth ): Generator {
+ yield from $this->streamShaGenerator( $baseSha, $baseDepth );
+ }
+ );
+ }
+ }
+
+ public function getSize( string $sha ): int {
+ $result = 0;
+
+ $this->locator->locate(
+ $this->pool,
+ $sha,
+ function( string $packFile, int $offset ) use ( &$result ): void {
+ $result = $this->reader->getSize( $this->pool, $packFile, $offset );
+ }
+ );
+
+ return $result;
}
}
git/PackEntryReader.php
+<?php
+require_once __DIR__ . '/FileHandlePool.php';
+require_once __DIR__ . '/DeltaDecoder.php';
+require_once __DIR__ . '/CompressionStream.php';
+
+class PackEntryReader {
+ private const MAX_DEPTH = 200;
+ private const MAX_BASE_RAM = 2097152;
+
+ private DeltaDecoder $decoder;
+
+ public function __construct( DeltaDecoder $decoder ) {
+ $this->decoder = $decoder;
+ }
+
+ public function getSize(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset
+ ): int {
+ return $pool->computeInt(
+ $packFile,
+ function( mixed $handle ) use ( $offset ): int {
+ fseek( $handle, $offset );
+
+ $header = $this->readVarInt( $handle );
+ $size = $header['value'];
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ $size = $this->decoder->readDeltaTargetSize( $handle, $type );
+ }
+
+ return $size;
+ },
+ 0
+ );
+ }
+
+ public function read(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset,
+ int $cap,
+ callable $readShaBaseFn
+ ): string {
+ return $pool->computeString(
+ $packFile,
+ function( mixed $handle ) use (
+ $offset,
+ $cap,
+ $pool,
+ $packFile,
+ $readShaBaseFn
+ ): string {
+ fseek( $handle, $offset );
+
+ $header = $this->readVarInt( $handle );
+ $type = $header['byte'] >> 4 & 7;
+ $result = '';
+
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $handle );
+ $cur = ftell( $handle );
+ $base = $offset - $neg;
+ $bData = $this->read(
+ $pool,
+ $packFile,
+ $base,
+ $cap,
+ $readShaBaseFn
+ );
+
+ fseek( $handle, $cur );
+
+ $delta = $this->inflate( $handle );
+ $result = $this->decoder->apply( $bData, $delta, $cap );
+ } elseif( $type === 7 ) {
+ $sha = bin2hex( fread( $handle, 20 ) );
+ $bas = $readShaBaseFn( $sha, $cap );
+ $del = $this->inflate( $handle );
+ $result = $this->decoder->apply( $bas, $del, $cap );
+ } else {
+ $result = $this->inflate( $handle, $cap );
+ }
+
+ return $result;
+ },
+ ''
+ );
+ }
+
+ public function streamRawCompressed(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset
+ ): Generator {
+ yield from $pool->streamGenerator(
+ $packFile,
+ function( mixed $handle ) use ( $offset ): Generator {
+ fseek( $handle, $offset );
+
+ $header = $this->readVarInt( $handle );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type !== 6 && $type !== 7 ) {
+ $stream = CompressionStream::createExtractor();
+
+ yield from $stream->stream( $handle );
+ }
+ }
+ );
+ }
+
+ public function streamEntryGenerator(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): Generator {
+ yield from $pool->streamGenerator(
+ $packFile,
+ function( mixed $handle ) use (
+ $pool,
+ $packFile,
+ $offset,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ ): Generator {
+ fseek( $handle, $offset );
+
+ $header = $this->readVarInt( $handle );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ yield from $this->streamDeltaObjectGenerator(
+ $handle,
+ $pool,
+ $packFile,
+ $offset,
+ $type,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ );
+ } else {
+ $stream = CompressionStream::createInflater();
+
+ yield from $stream->stream( $handle );
+ }
+ }
+ );
+ }
+
+ private function streamDeltaObjectGenerator(
+ mixed $handle,
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset,
+ int $type,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): Generator {
+ if( $depth < self::MAX_DEPTH ) {
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $handle );
+ $deltaPos = ftell( $handle );
+ $baseSize = $this->getSize( $pool, $packFile, $offset - $neg );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpHandle = $this->resolveBaseToTempFile(
+ $pool,
+ $packFile,
+ $offset - $neg,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ );
+
+ if( $tmpHandle !== false ) {
+ fseek( $handle, $deltaPos );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $handle,
+ $tmpHandle
+ );
+
+ fclose( $tmpHandle );
+ }
+ } else {
+ $base = '';
+
+ foreach( $this->streamEntryGenerator(
+ $pool,
+ $packFile,
+ $offset - $neg,
+ $depth + 1,
+ $getSizeShaFn,
+ $streamShaFn
+ ) as $chunk ) {
+ $base .= $chunk;
+ }
+
+ fseek( $handle, $deltaPos );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $handle,
+ $base
+ );
+ }
+ } else {
+ $baseSha = bin2hex( fread( $handle, 20 ) );
+ $baseSize = $getSizeShaFn( $baseSha );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpHandle = tmpfile();
+
+ if( $tmpHandle !== false ) {
+ $written = false;
+
+ foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
+ fwrite( $tmpHandle, $chunk );
+
+ $written = true;
+ }
+
+ if( $written ) {
+ rewind( $tmpHandle );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $handle,
+ $tmpHandle
+ );
+ }
+
+ fclose( $tmpHandle );
+ }
+ } else {
+ $base = '';
+ $written = false;
+
+ foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
+ $base .= $chunk;
+ $written = true;
+ }
+
+ if( $written ) {
+ yield from $this->decoder->applyStreamGenerator(
+ $handle,
+ $base
+ );
+ }
+ }
+ }
+ }
+ }
+
+ private function resolveBaseToTempFile(
+ FileHandlePool $pool,
+ string $packFile,
+ int $baseOffset,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ) {
+ $tmpHandle = tmpfile();
+
+ if( $tmpHandle !== false ) {
+ foreach( $this->streamEntryGenerator(
+ $pool,
+ $packFile,
+ $baseOffset,
+ $depth + 1,
+ $getSizeShaFn,
+ $streamShaFn
+ ) as $chunk ) {
+ fwrite( $tmpHandle, $chunk );
+ }
+
+ rewind( $tmpHandle );
+ }
+
+ return $tmpHandle;
+ }
+
+ private function readVarInt( mixed $handle ): array {
+ $byte = ord( fread( $handle, 1 ) );
+ $val = $byte & 15;
+ $shft = 4;
+ $fst = $byte;
+
+ while( $byte & 128 ) {
+ $byte = ord( fread( $handle, 1 ) );
+ $val |= ($byte & 127) << $shft;
+ $shft += 7;
+ }
+
+ return [ 'value' => $val, 'byte' => $fst ];
+ }
+
+ private function readOffsetDelta( mixed $handle ): int {
+ $byte = ord( fread( $handle, 1 ) );
+ $neg = $byte & 127;
+
+ while( $byte & 128 ) {
+ $byte = ord( fread( $handle, 1 ) );
+ $neg = ($neg + 1) << 7 | $byte & 127;
+ }
+
+ return $neg;
+ }
+
+ private function inflate( mixed $handle, int $cap = 0 ): string {
+ $stream = CompressionStream::createInflater();
+ $result = '';
+
+ foreach( $stream->stream( $handle ) as $data ) {
+ $result .= $data;
+
+ if( $cap > 0 && strlen( $result ) >= $cap ) {
+ $result = substr( $result, 0, $cap );
+
+ break;
+ }
+ }
+
+ return $result;
+ }
+}
git/PackIndex.php
+<?php
+class PackIndex {
+ private string $indexFile;
+ private string $packFile;
+ private array $fanoutCache;
+
+ public function __construct( string $indexFile ) {
+ $this->indexFile = $indexFile;
+ $this->packFile = str_replace( '.idx', '.pack', $indexFile );
+ $this->fanoutCache = [];
+ }
+
+ public function search(
+ FileHandlePool $pool,
+ string $sha,
+ callable $onFound
+ ): void {
+ $pool->computeVoid(
+ $this->indexFile,
+ function( mixed $handle ) use ( $sha, $onFound ): void {
+ $this->ensureFanout( $handle );
+
+ if( !empty( $this->fanoutCache ) ) {
+ $this->binarySearch( $handle, $sha, $onFound );
+ }
+ }
+ );
+ }
+
+ private function ensureFanout( mixed $handle ): void {
+ if( empty( $this->fanoutCache ) ) {
+ fseek( $handle, 0 );
+
+ $head = fread( $handle, 8 );
+
+ if( $head === "\377tOc\0\0\0\2" ) {
+ $this->fanoutCache = array_values(
+ unpack( 'N*', fread( $handle, 1024 ) )
+ );
+ }
+ }
+ }
+
+ private function binarySearch(
+ mixed $handle,
+ string $sha,
+ callable $onFound
+ ): void {
+ $byte = ord( $sha[0] );
+ $start = $byte === 0 ? 0 : $this->fanoutCache[$byte - 1];
+ $end = $this->fanoutCache[$byte];
+ $result = 0;
+
+ if( $end > $start ) {
+ $low = $start;
+ $high = $end - 1;
+
+ while( $result === 0 && $low <= $high ) {
+ $mid = ($low + $high) >> 1;
+
+ fseek( $handle, 1032 + $mid * 20 );
+
+ $cmp = fread( $handle, 20 );
+
+ if( $cmp < $sha ) {
+ $low = $mid + 1;
+ } elseif( $cmp > $sha ) {
+ $high = $mid - 1;
+ } else {
+ $result = $this->readOffset( $handle, $mid );
+ }
+ }
+ }
+
+ if( $result !== 0 ) {
+ $onFound( $this->packFile, $result );
+ }
+ }
+
+ private function readOffset( mixed $handle, int $mid ): int {
+ $total = $this->fanoutCache[255];
+ $pos = 1032 + $total * 24 + $mid * 4;
+ $result = 0;
+
+ fseek( $handle, $pos );
+
+ $packed = fread( $handle, 4 );
+ $offset = unpack( 'N', $packed )[1];
+
+ if( $offset & 0x80000000 ) {
+ $pos64 = 1032 + $total * 28 + ($offset & 0x7FFFFFFF) * 8;
+
+ fseek( $handle, $pos64 );
+
+ $offset = unpack( 'J', fread( $handle, 8 ) )[1];
+ }
+
+ $result = (int)$offset;
+
+ return $result;
+ }
+}
git/PathLocator.php
+<?php
+require_once __DIR__ . '/PackIndex.php';
+require_once __DIR__ . '/FileHandlePool.php';
+
+class PackLocator {
+ private array $indexes;
+
+ public function __construct( string $objectsPath ) {
+ $this->indexes = [];
+ $packFiles = glob( "{$objectsPath}/pack/*.idx" ) ?: [];
+
+ foreach( $packFiles as $indexFile ) {
+ $this->indexes[] = new PackIndex( $indexFile );
+ }
+ }
+
+ public function locate(
+ FileHandlePool $pool,
+ string $sha,
+ callable $action
+ ): void {
+ if( strlen( $sha ) === 40 && ctype_xdigit( $sha ) ) {
+ $binarySha = hex2bin( $sha );
+ $found = false;
+ $count = count( $this->indexes );
+ $index = 0;
+
+ while( !$found && $index < $count ) {
+ $this->indexes[$index]->search(
+ $pool,
+ $binarySha,
+ function(
+ string $packFile,
+ int $offset
+ ) use (
+ &$found,
+ $index,
+ $action
+ ): void {
+ $found = true;
+
+ if( $index > 0 ) {
+ $temp = $this->indexes[0];
+ $this->indexes[0] = $this->indexes[$index];
+ $this->indexes[$index] = $temp;
+ }
+
+ $action( $packFile, $offset );
+ }
+ );
+
+ $index++;
+ }
+ }
+ }
+}
Delta956 lines added, 780 lines removed, 176-line increase