Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Further constrains memory limits for diff

Author Dave Jarvis <email>
Date 2026-02-18 12:58:16 GMT-0800
Commit 8156e7b87afbcf9868d2ef54e966cf25673af5b0
Parent a4c7a60
git/GitPacks.php
<?php
class GitPacks {
- private const MAX_READ = 1040576;
- private const MAX_RAM = 1048576;
-
- private string $objectsPath;
- private array $packFiles;
- private string $lastPack = '';
- private array $fileHandles;
- private array $fanoutCache;
- private array $shaBucketCache;
- private array $offsetBucketCache;
-
- public function __construct( string $objectsPath ) {
- $this->objectsPath = $objectsPath;
- $this->packFiles = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
- $this->fileHandles = [];
- $this->fanoutCache = [];
- $this->shaBucketCache = [];
- $this->offsetBucketCache = [];
- }
-
- public function __destruct() {
- foreach( $this->fileHandles as $handle ) {
- if( is_resource( $handle ) ) {
- fclose( $handle );
- }
- }
- }
-
- public function peek( string $sha, int $len = 12 ): string {
- $info = $this->findPackInfo( $sha );
- $result = '';
-
- if( $info['offset'] !== 0 ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- $result = $this->readPackEntry(
- $handle,
- $info['offset'],
- $len,
- $len
- );
- }
- }
-
- return $result;
- }
-
- public function read( string $sha ): string {
- $info = $this->findPackInfo( $sha );
- $result = '';
-
- if( $info['offset'] !== 0 ) {
- $size = $this->extractPackedSize( $info['file'], $info['offset'] );
-
- if( $size <= self::MAX_RAM ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- $result = $this->readPackEntry(
- $handle,
- $info['offset'],
- $size
- );
- }
- }
- }
-
- return $result;
- }
-
- public function stream( string $sha, callable $callback ): bool {
- $info = $this->findPackInfo( $sha );
- $result = false;
-
- if( $info['offset'] !== 0 ) {
- $size = $this->extractPackedSize( $info['file'], $info['offset'] );
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- $result = $this->streamPackEntry(
- $handle,
- $info['offset'],
- $size,
- $callback
- );
- }
- }
-
- return $result;
- }
-
- public function getSize( string $sha ): int {
- $info = $this->findPackInfo( $sha );
- $result = 0;
-
- if( $info['offset'] !== 0 ) {
- $result = $this->extractPackedSize( $info['file'], $info['offset'] );
- }
-
- return $result;
- }
-
- private function findPackInfo( string $sha ): array {
- $result = [ 'offset' => 0, 'file' => '' ];
- $binarySha = hex2bin( $sha );
-
- if( strlen( $sha ) === 40 && $binarySha !== false ) {
- if( $this->lastPack !== '' ) {
- $offset = $this->findInIdx( $this->lastPack, $binarySha );
-
- if( $offset !== 0 ) {
- $result = [
- 'file' => str_replace( '.idx', '.pack', $this->lastPack ),
- 'offset' => $offset
- ];
- }
- }
-
- if( $result['offset'] === 0 ) {
- foreach( $this->packFiles as $indexFile ) {
- if( $indexFile !== $this->lastPack ) {
- $offset = $this->findInIdx( $indexFile, $binarySha );
-
- if( $offset !== 0 ) {
- $this->lastPack = $indexFile;
- $result = [
- 'file' => str_replace( '.idx', '.pack', $indexFile ),
- 'offset' => $offset
- ];
- break;
- }
- }
- }
- }
- }
-
- return $result;
- }
-
- private function findInIdx( string $indexFile, string $binarySha ): int {
- $handle = $this->getHandle( $indexFile );
- $result = 0;
-
- if( $handle ) {
- if( !isset( $this->fanoutCache[$indexFile] ) ) {
- fseek( $handle, 0 );
- $head = fread( $handle, 8 );
-
- if( $head === "\377tOc\0\0\0\2" ) {
- $this->fanoutCache[$indexFile] = array_values(
- unpack( 'N*', fread( $handle, 1024 ) )
- );
- }
- }
-
- if( isset( $this->fanoutCache[$indexFile] ) ) {
- $fanout = $this->fanoutCache[$indexFile];
- $byte = ord( $binarySha[0] );
- $start = $byte === 0 ? 0 : $fanout[$byte - 1];
- $end = $fanout[$byte];
-
- if( $end > $start ) {
- $result = $this->binarySearchIdx(
- $indexFile,
- $handle,
- $start,
- $end,
- $binarySha,
- $fanout[255]
- );
- }
- }
- }
-
- return $result;
- }
-
- private function binarySearchIdx(
- string $indexFile,
- $handle,
- int $start,
- int $end,
- string $binarySha,
- int $total
- ): int {
- $key = "$indexFile:$start";
- $count = $end - $start;
- $result = 0;
-
- if( !isset( $this->shaBucketCache[$key] ) ) {
- fseek( $handle, 1032 + ($start * 20) );
- $this->shaBucketCache[$key] = fread( $handle, $count * 20 );
-
- fseek( $handle, 1032 + ($total * 24) + ($start * 4) );
- $this->offsetBucketCache[$key] = fread( $handle, $count * 4 );
- }
-
- $shaBlock = $this->shaBucketCache[$key];
- $low = 0;
- $high = $count - 1;
- $found = -1;
-
- while( $low <= $high ) {
- $mid = ($low + $high) >> 1;
- $cmp = substr( $shaBlock, $mid * 20, 20 );
-
- if( $cmp < $binarySha ) {
- $low = $mid + 1;
- } elseif( $cmp > $binarySha ) {
- $high = $mid - 1;
- } else {
- $found = $mid;
- break;
- }
- }
-
- if( $found !== -1 ) {
- $packed = substr( $this->offsetBucketCache[$key], $found * 4, 4 );
- $offset = unpack( 'N', $packed )[1];
-
- if( $offset & 0x80000000 ) {
- $pos64 = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8);
- fseek( $handle, $pos64 );
- $offset = unpack( 'J', fread( $handle, 8 ) )[1];
- }
- $result = (int)$offset;
- }
-
- return $result;
- }
-
- private function readPackEntry(
- $handle,
- int $offset,
- int $size,
- int $cap = 0
- ): string {
- fseek( $handle, $offset );
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
-
- return ($type === 6)
- ? $this->handleOfsDelta( $handle, $offset, $size, $cap )
- : (($type === 7)
- ? $this->handleRefDelta( $handle, $size, $cap )
- : $this->decompressToString( $handle, $cap ));
- }
-
- private function streamPackEntry(
- $handle,
- int $offset,
- int $size,
- callable $callback
- ): bool {
- fseek( $handle, $offset );
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
-
- return ($type === 6 || $type === 7)
- ? $this->streamDeltaObject( $handle, $offset, $type, $callback )
- : $this->streamDecompression( $handle, $callback );
- }
-
- private function streamDeltaObject(
- $handle,
- int $offset,
- int $type,
- callable $callback
- ): bool {
- fseek( $handle, $offset );
- $this->readVarInt( $handle );
- $result = false;
-
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $handle );
- $deltaPos = ftell( $handle );
- $base = '';
-
- $this->streamPackEntry(
- $handle,
- $offset - $neg,
- 0,
- function( $c ) use ( &$base ) { $base .= $c; }
- );
-
- fseek( $handle, $deltaPos );
- $result = $this->applyDeltaStream( $handle, $base, $callback );
- } else {
- $baseSha = bin2hex( fread( $handle, 20 ) );
- $base = '';
-
- if( $this->stream( $baseSha, function( $c ) use ( &$base ) {
- $base .= $c;
- } ) ) {
- $result = $this->applyDeltaStream( $handle, $base, $callback );
- }
- }
-
- return $result;
- }
-
- private function applyDeltaStream(
- $handle,
- string $base,
- callable $callback
- ): bool {
- $infl = inflate_init( ZLIB_ENCODING_DEFLATE );
- $ok = false;
-
- if( $infl ) {
- $state = 0;
- $buffer = '';
- $ok = true;
-
- while( !feof( $handle ) ) {
- $chunk = fread( $handle, 8192 );
-
- if( $chunk === '' ) {
- break;
- }
-
- $data = @inflate_add( $infl, $chunk );
-
- if( $data === false ) {
- $ok = false;
- break;
- }
-
- $buffer .= $data;
-
- while( true ) {
- $len = strlen( $buffer );
-
- if( $len === 0 ) {
- break;
- }
-
- if( $state < 2 ) {
- $pos = 0;
- while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) { $pos++; }
-
- if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
- break;
- }
-
- $buffer = substr( $buffer, $pos + 1 );
- $state++;
- continue;
- }
-
- $op = ord( $buffer[0] );
-
- if( $op & 128 ) {
- $need = $this->getCopyInstructionSize( $op );
-
- if( $len < 1 + $need ) {
- break;
- }
-
- $info = $this->parseCopyInstruction( $op, $buffer, 1 );
-
- $callback( substr( $base, $info['off'], $info['len'] ) );
- $buffer = substr( $buffer, 1 + $need );
- } else {
- $ln = $op & 127;
-
- if( $len < 1 + $ln ) {
- break;
- }
-
- $callback( substr( $buffer, 1, $ln ) );
- $buffer = substr( $buffer, 1 + $ln );
- }
- }
-
- if( inflate_get_status( $infl ) === ZLIB_STREAM_END ) {
- break;
- }
- }
- }
-
- return $ok;
- }
-
- private function streamDecompression( $handle, callable $callback ): bool {
- $infl = inflate_init( ZLIB_ENCODING_DEFLATE );
-
- if( !$infl ) {
- return false;
- }
-
- while( !feof( $handle ) ) {
- $chunk = fread( $handle, 8192 );
-
- if( $chunk === '' ) {
- break;
- }
-
- $data = @inflate_add( $infl, $chunk );
-
- if( $data !== false && $data !== '' ) {
- $callback( $data );
- }
-
- if( $data === false ||
- inflate_get_status( $infl ) === ZLIB_STREAM_END ) {
- break;
- }
- }
-
- return true;
- }
-
- private function decompressToString(
- $handle,
- int $cap = 0
- ): string {
- $infl = inflate_init( ZLIB_ENCODING_DEFLATE );
- $res = '';
-
- if( $infl ) {
- while( !feof( $handle ) ) {
- $chunk = fread( $handle, 8192 );
-
- if( $chunk === '' ) {
- break;
- }
-
- $data = @inflate_add( $infl, $chunk );
-
- if( $data !== false ) {
- $res .= $data;
- }
-
- if( $cap > 0 && strlen( $res ) >= $cap ) {
- $res = substr( $res, 0, $cap );
- break;
- }
-
- if( $data === false ||
- inflate_get_status( $infl ) === ZLIB_STREAM_END ) {
- break;
- }
- }
- }
-
- return $res;
- }
-
- private function extractPackedSize( string $packPath, int $offset ): int {
- $handle = $this->getHandle( $packPath );
+ private const MAX_READ = 1040576;
+ private const MAX_RAM = 1048576;
+ private const MAX_BASE_RAM = 524288;
+ private const MAX_DEPTH = 50;
+
+ private string $objectsPath;
+ private array $packFiles;
+ private string $lastPack = '';
+ private array $fileHandles;
+ private array $fanoutCache;
+ private array $shaBucketCache;
+ private array $offsetBucketCache;
+
+ public function __construct( string $objectsPath ) {
+ $this->objectsPath = $objectsPath;
+ $this->packFiles = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
+ $this->fileHandles = [];
+ $this->fanoutCache = [];
+ $this->shaBucketCache = [];
+ $this->offsetBucketCache = [];
+ }
+
+ public function __destruct() {
+ foreach( $this->fileHandles as $handle ) {
+ if( is_resource( $handle ) ) {
+ fclose( $handle );
+ }
+ }
+ }
+
+ public function peek( string $sha, int $len = 12 ): string {
+ $info = $this->findPackInfo( $sha );
+ $result = '';
+
+ if( $info['offset'] !== 0 ) {
+ $handle = $this->getHandle( $info['file'] );
+
+ if( $handle ) {
+ $result = $this->readPackEntry(
+ $handle,
+ $info['offset'],
+ $len,
+ $len
+ );
+ }
+ }
+
+ return $result;
+ }
+
+ public function read( string $sha ): string {
+ $info = $this->findPackInfo( $sha );
+ $result = '';
+
+ if( $info['offset'] !== 0 ) {
+ $size = $this->extractPackedSize( $info['file'], $info['offset'] );
+
+ if( $size <= self::MAX_RAM ) {
+ $handle = $this->getHandle( $info['file'] );
+
+ if( $handle ) {
+ $result = $this->readPackEntry(
+ $handle,
+ $info['offset'],
+ $size
+ );
+ }
+ }
+ }
+
+ return $result;
+ }
+
+ public function stream( string $sha, callable $callback ): bool {
+ return $this->streamInternal( $sha, $callback, 0 );
+ }
+
+ private function streamInternal(
+ string $sha,
+ callable $callback,
+ int $depth
+ ): bool {
+ $info = $this->findPackInfo( $sha );
+ $result = false;
+
+ if( $info['offset'] !== 0 ) {
+ $size = $this->extractPackedSize( $info['file'], $info['offset'] );
+ $handle = $this->getHandle( $info['file'] );
+
+ if( $handle ) {
+ $result = $this->streamPackEntry(
+ $handle,
+ $info['offset'],
+ $size,
+ $callback,
+ $depth
+ );
+ }
+ }
+
+ return $result;
+ }
+
+ public function getSize( string $sha ): int {
+ $info = $this->findPackInfo( $sha );
+ $result = 0;
+
+ if( $info['offset'] !== 0 ) {
+ $result = $this->extractPackedSize( $info['file'], $info['offset'] );
+ }
+
+ return $result;
+ }
+
+ private function findPackInfo( string $sha ): array {
+ $result = [ 'offset' => 0, 'file' => '' ];
+ $binarySha = hex2bin( $sha );
+
+ if( strlen( $sha ) === 40 && $binarySha !== false ) {
+ if( $this->lastPack !== '' ) {
+ $offset = $this->findInIdx( $this->lastPack, $binarySha );
+
+ if( $offset !== 0 ) {
+ $result = [
+ 'file' => str_replace( '.idx', '.pack', $this->lastPack ),
+ 'offset' => $offset
+ ];
+ }
+ }
+
+ if( $result['offset'] === 0 ) {
+ foreach( $this->packFiles as $indexFile ) {
+ if( $indexFile !== $this->lastPack ) {
+ $offset = $this->findInIdx( $indexFile, $binarySha );
+
+ if( $offset !== 0 ) {
+ $this->lastPack = $indexFile;
+ $result = [
+ 'file' => str_replace( '.idx', '.pack', $indexFile ),
+ 'offset' => $offset
+ ];
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return $result;
+ }
+
+ private function findInIdx( string $indexFile, string $binarySha ): int {
+ $handle = $this->getHandle( $indexFile );
+ $result = 0;
+
+ if( $handle ) {
+ if( !isset( $this->fanoutCache[$indexFile] ) ) {
+ fseek( $handle, 0 );
+ $head = fread( $handle, 8 );
+
+ if( $head === "\377tOc\0\0\0\2" ) {
+ $this->fanoutCache[$indexFile] = array_values(
+ unpack( 'N*', fread( $handle, 1024 ) )
+ );
+ }
+ }
+
+ if( isset( $this->fanoutCache[$indexFile] ) ) {
+ $fanout = $this->fanoutCache[$indexFile];
+ $byte = ord( $binarySha[0] );
+ $start = $byte === 0 ? 0 : $fanout[$byte - 1];
+ $end = $fanout[$byte];
+
+ if( $end > $start ) {
+ $result = $this->binarySearchIdx(
+ $indexFile,
+ $handle,
+ $start,
+ $end,
+ $binarySha,
+ $fanout[255]
+ );
+ }
+ }
+ }
+
+ return $result;
+ }
+
+ private function binarySearchIdx(
+ string $indexFile,
+ $handle,
+ int $start,
+ int $end,
+ string $binarySha,
+ int $total
+ ): int {
+ $key = "$indexFile:$start";
+ $count = $end - $start;
+ $result = 0;
+
+ if( !isset( $this->shaBucketCache[$key] ) ) {
+ fseek( $handle, 1032 + ($start * 20) );
+ $this->shaBucketCache[$key] = fread( $handle, $count * 20 );
+
+ fseek( $handle, 1032 + ($total * 24) + ($start * 4) );
+ $this->offsetBucketCache[$key] = fread( $handle, $count * 4 );
+ }
+
+ $shaBlock = $this->shaBucketCache[$key];
+ $low = 0;
+ $high = $count - 1;
+ $found = -1;
+
+ while( $low <= $high ) {
+ $mid = ($low + $high) >> 1;
+ $cmp = substr( $shaBlock, $mid * 20, 20 );
+
+ if( $cmp < $binarySha ) {
+ $low = $mid + 1;
+ } elseif( $cmp > $binarySha ) {
+ $high = $mid - 1;
+ } else {
+ $found = $mid;
+ break;
+ }
+ }
+
+ if( $found !== -1 ) {
+ $packed = substr( $this->offsetBucketCache[$key], $found * 4, 4 );
+ $offset = unpack( 'N', $packed )[1];
+
+ if( $offset & 0x80000000 ) {
+ $pos64 = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8);
+ fseek( $handle, $pos64 );
+ $offset = unpack( 'J', fread( $handle, 8 ) )[1];
+ }
+ $result = (int)$offset;
+ }
+
+ return $result;
+ }
+
+ private function readPackEntry(
+ $handle,
+ int $offset,
+ int $size,
+ int $cap = 0
+ ): string {
+ fseek( $handle, $offset );
+ $header = $this->readVarInt( $handle );
+ $type = ($header['byte'] >> 4) & 7;
+
+ return ($type === 6)
+ ? $this->handleOfsDelta( $handle, $offset, $size, $cap )
+ : (($type === 7)
+ ? $this->handleRefDelta( $handle, $size, $cap )
+ : $this->decompressToString( $handle, $cap ));
+ }
+
+ private function streamPackEntry(
+ $handle,
+ int $offset,
+ int $size,
+ callable $callback,
+ int $depth = 0
+ ): bool {
+ fseek( $handle, $offset );
+ $header = $this->readVarInt( $handle );
+ $type = ($header['byte'] >> 4) & 7;
+
+ return ($type === 6 || $type === 7)
+ ? $this->streamDeltaObject( $handle, $offset, $type, $callback, $depth )
+ : $this->streamDecompression( $handle, $callback );
+ }
+
+ private function streamDeltaObject(
+ $handle,
+ int $offset,
+ int $type,
+ callable $callback,
+ int $depth = 0
+ ): bool {
+ if( $depth >= self::MAX_DEPTH ) {
+ return false;
+ }
+
+ fseek( $handle, $offset );
+ $this->readVarInt( $handle );
+ $result = false;
+
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $handle );
+ $deltaPos = ftell( $handle );
+ $base = '';
+
+ $baseSize = $this->extractPackedSize( $handle, $offset - $neg );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ return false;
+ }
+
+ $this->streamPackEntry(
+ $handle,
+ $offset - $neg,
+ 0,
+ function( $c ) use ( &$base ) { $base .= $c; },
+ $depth + 1
+ );
+
+ fseek( $handle, $deltaPos );
+ $result = $this->applyDeltaStream( $handle, $base, $callback );
+ } else {
+ $baseSha = bin2hex( fread( $handle, 20 ) );
+ $baseSize = $this->getSize( $baseSha );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ return false;
+ }
+
+ $base = '';
+
+ if( $this->streamInternal( $baseSha, function( $c ) use ( &$base ) {
+ $base .= $c;
+ }, $depth + 1 ) ) {
+ $result = $this->applyDeltaStream( $handle, $base, $callback );
+ }
+ }
+
+ return $result;
+ }
+
+ private function applyDeltaStream(
+ $handle,
+ string $base,
+ callable $callback
+ ): bool {
+ $infl = inflate_init( ZLIB_ENCODING_DEFLATE );
+ $ok = false;
+
+ if( $infl ) {
+ $state = 0;
+ $buffer = '';
+ $ok = true;
+
+ while( !feof( $handle ) ) {
+ $chunk = fread( $handle, 8192 );
+
+ if( $chunk === '' ) {
+ break;
+ }
+
+ $data = @inflate_add( $infl, $chunk );
+
+ if( $data === false ) {
+ $ok = false;
+ break;
+ }
+
+ $buffer .= $data;
+
+ while( true ) {
+ $len = strlen( $buffer );
+
+ if( $len === 0 ) {
+ break;
+ }
+
+ if( $state < 2 ) {
+ $pos = 0;
+ while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) { $pos++; }
+
+ if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
+ break;
+ }
+
+ $buffer = substr( $buffer, $pos + 1 );
+ $state++;
+ continue;
+ }
+
+ $op = ord( $buffer[0] );
+
+ if( $op & 128 ) {
+ $need = $this->getCopyInstructionSize( $op );
+
+ if( $len < 1 + $need ) {
+ break;
+ }
+
+ $info = $this->parseCopyInstruction( $op, $buffer, 1 );
+
+ $callback( substr( $base, $info['off'], $info['len'] ) );
+ $buffer = substr( $buffer, 1 + $need );
+ } else {
+ $ln = $op & 127;
+
+ if( $len < 1 + $ln ) {
+ break;
+ }
+
+ $callback( substr( $buffer, 1, $ln ) );
+ $buffer = substr( $buffer, 1 + $ln );
+ }
+ }
+
+ if( inflate_get_status( $infl ) === ZLIB_STREAM_END ) {
+ break;
+ }
+ }
+ }
+
+ return $ok;
+ }
+
+ private function streamDecompression( $handle, callable $callback ): bool {
+ $infl = inflate_init( ZLIB_ENCODING_DEFLATE );
+
+ if( !$infl ) {
+ return false;
+ }
+
+ while( !feof( $handle ) ) {
+ $chunk = fread( $handle, 8192 );
+
+ if( $chunk === '' ) {
+ break;
+ }
+
+ $data = @inflate_add( $infl, $chunk );
+
+ if( $data !== false && $data !== '' ) {
+ $callback( $data );
+ }
+
+ if( $data === false ||
+ inflate_get_status( $infl ) === ZLIB_STREAM_END ) {
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ private function decompressToString(
+ $handle,
+ int $cap = 0
+ ): string {
+ $infl = inflate_init( ZLIB_ENCODING_DEFLATE );
+ $res = '';
+
+ if( $infl ) {
+ while( !feof( $handle ) ) {
+ $chunk = fread( $handle, 8192 );
+
+ if( $chunk === '' ) {
+ break;
+ }
+
+ $data = @inflate_add( $infl, $chunk );
+
+ if( $data !== false ) {
+ $res .= $data;
+ }
+
+ if( $cap > 0 && strlen( $res ) >= $cap ) {
+ $res = substr( $res, 0, $cap );
+ break;
+ }
+
+ if( $data === false ||
+ inflate_get_status( $infl ) === ZLIB_STREAM_END ) {
+ break;
+ }
+ }
+ }
+
+ return $res;
+ }
+
+ private function extractPackedSize( $packPathOrHandle, int $offset ): int {
+ $handle = is_resource( $packPathOrHandle )
+ ? $packPathOrHandle
+ : $this->getHandle( $packPathOrHandle );
$size = 0;
Delta 484 lines added, 452 lines removed, 32-line increase