Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Refactors stream compression

AuthorDave Jarvis <email>
Date2026-02-20 22:55:51 GMT-0800
Commitc7c5d46f544cb40d505afce647226633927d8f3a
Parent5a143f9
git/CompressionStream.php
}
+ public static function createExtractor(): self {
+ $context = inflate_init( ZLIB_ENCODING_DEFLATE );
+
+ return new self(
+ function( string $chunk ) use ( $context ): string {
+ $before = inflate_get_read_len( $context );
+ $discard = @inflate_add( $context, $chunk );
+ $after = inflate_get_read_len( $context );
+ $length = $after - $before;
+
+ return substr( $chunk, 0, $length );
+ },
+ function(): string {
+ return '';
+ },
+ function() use ( $context ): bool {
+ return inflate_get_status( $context ) === ZLIB_STREAM_END;
+ }
+ );
+ }
+
public static function createInflater(): self {
$context = inflate_init( ZLIB_ENCODING_DEFLATE );
}
);
+ }
+
+ public function stream( mixed $handle, int $chunkSize = 8192 ): Generator {
+ $done = false;
+
+ while( !$done && !feof( $handle ) ) {
+ $chunk = fread( $handle, $chunkSize );
+ $done = $chunk === false || $chunk === '';
+
+ if( !$done ) {
+ $data = $this->pump( $chunk );
+
+ if( $data !== '' ) {
+ yield $data;
+ }
+
+ $done = $this->finished();
+ }
+ }
}
git/Git.php
yield $hdr;
- $deflate = deflate_init( ZLIB_ENCODING_DEFLATE );
-
- foreach( $this->slurpChunks( $sha ) as $raw ) {
- $compressed = deflate_add( $deflate, $raw, ZLIB_NO_FLUSH );
-
- if( $compressed !== '' ) {
- hash_update( $ctx, $compressed );
- yield $compressed;
- }
- }
-
- $final = deflate_add( $deflate, '', ZLIB_FINISH );
-
- if( $final !== '' ) {
- hash_update( $ctx, $final );
- yield $final;
- }
- }
-
- yield hash_final( $ctx, true );
- }
-
- private function slurpChunks( string $sha ): Generator {
- $path = $this->getLoosePath( $sha );
-
- if( is_file( $path ) ) {
- yield from $this->looseObjectChunks( $path );
- } else {
- $any = false;
-
- foreach( $this->packs->streamGenerator( $sha ) as $chunk ) {
- $any = true;
- yield $chunk;
- }
-
- if( !$any ) {
- $data = $this->packs->read( $sha );
-
- if( $data !== '' ) {
- yield $data;
- }
- }
- }
- }
-
- private function looseObjectChunks( string $path ): Generator {
- $reader = BufferedFileReader::open( $path );
- $infl = $reader->isOpen()
- ? inflate_init( ZLIB_ENCODING_DEFLATE )
- : false;
-
- if( $reader->isOpen() && $infl !== false ) {
- $found = false;
- $buffer = '';
-
- while( !$reader->eof() ) {
- $chunk = $reader->read( 16384 );
- $inflated = inflate_add( $infl, $chunk );
-
- if( $inflated === false ) {
- break;
- }
-
- if( !$found ) {
- $buffer .= $inflated;
- $eos = strpos( $buffer, "\0" );
-
- if( $eos !== false ) {
- $found = true;
- $body = substr( $buffer, $eos + 1 );
-
- if( $body !== '' ) {
- yield $body;
- }
-
- $buffer = '';
- }
- } elseif( $inflated !== '' ) {
- yield $inflated;
- }
- }
- }
- }
-
- private function streamCompressedObject( string $sha, $ctx ): Generator {
- $stream = CompressionStream::createDeflater();
- $buffer = '';
-
- $this->slurp( $sha, function( $chunk ) use (
- $stream,
- $ctx,
- &$buffer
- ) {
- $compressed = $stream->pump( $chunk );
-
- if( $compressed !== '' ) {
- hash_update( $ctx, $compressed );
- $buffer .= $compressed;
- }
- } );
-
- $final = $stream->finish();
-
- if( $final !== '' ) {
- hash_update( $ctx, $final );
- $buffer .= $final;
- }
-
- $pos = 0;
- $len = strlen( $buffer );
-
- while( $pos < $len ) {
- $chunk = substr( $buffer, $pos, 32768 );
-
- yield $chunk;
- $pos += 32768;
- }
- }
-
- private function getTreeSha( string $commitOrTreeSha ): string {
- $data = $this->read( $commitOrTreeSha );
- $sha = $commitOrTreeSha;
-
- if( preg_match( '/^object ([0-9a-f]{40})/m', $data, $matches ) ) {
- $sha = $this->getTreeSha( $matches[1] );
- }
-
- if( $sha === $commitOrTreeSha &&
- preg_match( '/^tree ([0-9a-f]{40})/m', $data, $matches ) ) {
- $sha = $matches[1];
- }
-
- return $sha;
- }
-
- private function resolvePath( string $treeSha, string $path ): array {
- $parts = explode( '/', trim( $path, '/' ) );
- $sha = $treeSha;
- $mode = '40000';
-
- foreach( $parts as $part ) {
- $entry = [ 'sha' => '', 'mode' => '' ];
-
- if( $part !== '' && $sha !== '' ) {
- $entry = $this->findTreeEntry( $sha, $part );
- }
-
- $sha = $entry['sha'];
- $mode = $entry['mode'];
- }
-
- return [
- 'sha' => $sha,
- 'mode' => $mode,
- 'isDir' => $mode === '40000' || $mode === '040000'
- ];
- }
-
- private function findTreeEntry( string $treeSha, string $name ): array {
- $data = $this->read( $treeSha );
- $entry = [ 'sha' => '', 'mode' => '' ];
-
- $this->parseTreeData(
- $data,
- function( $file, $n, $sha, $mode ) use ( $name, &$entry ) {
- if( $file->isName( $name ) ) {
- $entry = [ 'sha' => $sha, 'mode' => $mode ];
-
- return false;
- }
- }
- );
-
- return $entry;
- }
-
- private function parseTagData(
- string $name,
- string $sha,
- string $data
- ): Tag {
- $isAnn = strncmp( $data, 'object ', 7 ) === 0;
- $pattern = $isAnn
- ? '/^tagger (.*) <(.*)> (\d+) [+\-]\d{4}$/m'
- : '/^author (.*) <(.*)> (\d+) [+\-]\d{4}$/m';
- $id = $this->parseIdentity( $data, $pattern );
- $target = $isAnn
- ? $this->extractPattern( $data, '/^object (.*)$/m', 1, $sha )
- : $sha;
-
- return new Tag(
- $name,
- $sha,
- $target,
- $id['timestamp'],
- $this->extractMessage( $data ),
- $id['name']
- );
- }
-
- private function extractPattern(
- string $data,
- string $pattern,
- int $group,
- string $default = ''
- ): string {
- return preg_match( $pattern, $data, $matches )
- ? $matches[$group]
- : $default;
- }
-
- private function parseIdentity( string $data, string $pattern ): array {
- $found = preg_match( $pattern, $data, $matches );
-
- return [
- 'name' => $found ? trim( $matches[1] ) : 'Unknown',
- 'email' => $found ? $matches[2] : '',
- 'timestamp' => $found ? (int)$matches[3] : 0
- ];
- }
-
- private function extractMessage( string $data ): string {
- $pos = strpos( $data, "\n\n" );
-
- return $pos !== false ? trim( substr( $data, $pos + 2 ) ) : '';
- }
-
- private function slurp( string $sha, callable $callback ): void {
- $path = $this->getLoosePath( $sha );
-
- if( is_file( $path ) ) {
- $this->slurpLooseObject( $path, $callback );
- } else {
- $this->slurpPackedObject( $sha, $callback );
- }
- }
-
- private function slurpLooseObject( string $path, callable $callback ): void {
- $this->iterateInflated(
- $path,
- function( $chunk ) use ( $callback ) {
- if( $chunk !== '' ) {
- $callback( $chunk );
- }
-
- return true;
- }
- );
- }
-
- private function slurpPackedObject( string $sha, callable $callback ): void {
- $streamed = $this->packs->stream( $sha, $callback );
-
- if( !$streamed ) {
- $data = $this->packs->read( $sha );
-
- if( $data !== '' ) {
- $callback( $data );
- }
- }
- }
-
- private function iterateInflated(
- string $path,
- callable $processor
- ): void {
- $reader = BufferedFileReader::open( $path );
- $infl = $reader->isOpen()
- ? inflate_init( ZLIB_ENCODING_DEFLATE )
- : false;
- $found = false;
- $buffer = '';
-
- if( $reader->isOpen() && $infl !== false ) {
- while( !$reader->eof() ) {
- $chunk = $reader->read( 16384 );
- $inflated = inflate_add( $infl, $chunk );
-
- if( $inflated === false ) {
- break;
- }
-
- if( !$found ) {
- $buffer .= $inflated;
- $eos = strpos( $buffer, "\0" );
-
- if( $eos !== false ) {
- $found = true;
- $body = substr( $buffer, $eos + 1 );
- $head = substr( $buffer, 0, $eos );
-
- if( $processor( $body, $head ) === false ) {
- break;
- }
- }
- } elseif( $processor( $inflated, '' ) === false ) {
- break;
- }
- }
- }
- }
-
- private function peekLooseObject( string $sha, int $length ): string {
- $path = $this->getLoosePath( $sha );
- $buf = '';
-
- if( is_file( $path ) ) {
- $this->iterateInflated(
- $path,
- function( $chunk ) use ( $length, &$buf ) {
- $buf .= $chunk;
-
- return strlen( $buf ) < $length;
- }
- );
- }
-
- return substr( $buf, 0, $length );
- }
-
- private function parseCommit( string $sha ): object {
- $data = $this->read( $sha );
- $result = (object)[ 'sha' => '' ];
-
- if( $data !== '' ) {
- $id = $this->parseIdentity(
- $data,
- '/^author (.*) <(.*)> (\d+)/m'
- );
-
- $result = (object)[
- 'sha' => $sha,
- 'message' => $this->extractMessage( $data ),
- 'author' => $id['name'],
- 'email' => $id['email'],
- 'date' => $id['timestamp'],
- 'parentSha' => $this->extractPattern( $data, '/^parent (.*)$/m', 1 )
- ];
- }
-
- return $result;
- }
-
- private function walkTree( string $sha, callable $callback ): void {
- $data = $this->read( $sha );
- $tree = $data;
-
- if( $data !== '' && preg_match( '/^tree (.*)$/m', $data, $m ) ) {
- $tree = $this->read( $m[1] );
- }
-
- if( $tree !== '' && $this->isTreeData( $tree ) ) {
- $this->processTree( $tree, $callback );
- }
- }
-
- private function processTree( string $data, callable $callback ): void {
- $this->parseTreeData(
- $data,
- function( $file, $n, $s, $m ) use ( $callback ) {
- $callback( $file );
- }
- );
- }
-
- public function parseTreeData( string $data, callable $callback ): void {
- $pos = 0;
- $len = strlen( $data );
-
- while( $pos < $len ) {
- $space = strpos( $data, ' ', $pos );
- $eos = strpos( $data, "\0", $space );
-
- if( $space === false || $eos === false || $eos + 21 > $len ) {
- break;
- }
-
- $mode = substr( $data, $pos, $space - $pos );
- $name = substr( $data, $space + 1, $eos - $space - 1 );
- $sha = bin2hex( substr( $data, $eos + 1, 20 ) );
- $dir = $mode === '40000' || $mode === '040000';
- $isSub = $mode === '160000';
-
- $file = new File(
- $name,
- $sha,
- $mode,
- 0,
- $dir || $isSub ? 0 : $this->getObjectSize( $sha ),
- $dir || $isSub ? '' : $this->peek( $sha )
- );
-
- if( $callback( $file, $name, $sha, $mode ) === false ) {
- break;
- }
-
- $pos = $eos + 21;
- }
- }
-
- private function isTreeData( string $data ): bool {
- $len = strlen( $data );
- $patt = '/^(40000|100644|100755|120000|160000) /';
- $match = $len >= 25 && preg_match( $patt, $data );
- $eos = $match ? strpos( $data, "\0" ) : false;
-
- return $match && $eos !== false && $eos + 21 <= $len;
- }
-
- private function getLoosePath( string $sha ): string {
- return "{$this->objPath}/" . substr( $sha, 0, 2 ) . "/" .
- substr( $sha, 2 );
- }
-
- private function getLooseObjectSize( string $sha ): int {
- $path = $this->getLoosePath( $sha );
- $size = 0;
-
- if( is_file( $path ) ) {
- $this->iterateInflated(
- $path,
- function( $c, $head ) use ( &$size ) {
- if( $head !== '' ) {
- $parts = explode( ' ', $head );
- $size = isset( $parts[1] ) ? (int)$parts[1] : 0;
- }
-
- return false;
- }
- );
- }
-
- return $size;
- }
-
- public function collectObjects( array $wants, array $haves = [] ): array {
- $objs = $this->traverseObjects( $wants );
- $result = [];
-
- if( !empty( $haves ) ) {
- $haveObjs = $this->traverseObjects( $haves );
-
- foreach( $haveObjs as $sha => $type ) {
- if( isset( $objs[$sha] ) ) {
- unset( $objs[$sha] );
- }
- }
- }
-
- $result = $objs;
-
- return $result;
- }
-
- private function traverseObjects( array $roots ): array {
- $objs = [];
- $queue = [];
-
- foreach( $roots as $sha ) {
- $queue[] = [ 'sha' => $sha, 'type' => 0 ];
- }
-
- while( !empty( $queue ) ) {
- $item = array_pop( $queue );
- $sha = $item['sha'];
- $type = $item['type'];
-
- if( isset( $objs[$sha] ) ) {
- continue;
- }
-
- $data = '';
-
- if( $type !== 3 ) {
- $data = $this->read( $sha );
-
- if( $type === 0 ) {
- $type = $this->getObjectType( $data );
- }
- }
-
- $objs[$sha] = $type;
-
- if( $type === 1 ) {
- $hasTree = preg_match( '/^tree ([0-9a-f]{40})/m', $data, $m );
-
- if( $hasTree ) {
- $queue[] = [ 'sha' => $m[1], 'type' => 2 ];
- }
-
- $hasParents = preg_match_all(
- '/^parent ([0-9a-f]{40})/m',
- $data,
- $m
- );
-
- if( $hasParents ) {
- foreach( $m[1] as $parentSha ) {
- $queue[] = [ 'sha' => $parentSha, 'type' => 1 ];
- }
- }
- } elseif( $type === 2 ) {
- $pos = 0;
- $len = strlen( $data );
-
- while( $pos < $len ) {
- $space = strpos( $data, ' ', $pos );
- $eos = strpos( $data, "\0", $space );
-
- if( $space === false || $eos === false ) {
- break;
- }
-
- $mode = substr( $data, $pos, $space - $pos );
- $hash = bin2hex( substr( $data, $eos + 1, 20 ) );
-
- if( $mode !== '160000' ) {
- $isDir = $mode === '40000' || $mode === '040000';
- $queue[] = [ 'sha' => $hash, 'type' => $isDir ? 2 : 3 ];
- }
-
- $pos = $eos + 21;
- }
- } elseif( $type === 4 ) {
- $isTagTgt = preg_match( '/^object ([0-9a-f]{40})/m', $data, $m );
-
- if( $isTagTgt ) {
- $nextType = 1;
-
- if( preg_match( '/^type (commit|tree|blob|tag)/m', $data, $t ) ) {
- $map = [
- 'commit' => 1,
- 'tree' => 2,
- 'blob' => 3,
- 'tag' => 4
- ];
- $nextType = $map[$t[1]] ?? 1;
- }
-
- $queue[] = [ 'sha' => $m[1], 'type' => $nextType ];
- }
- }
- }
-
- return $objs;
- }
-
- private function getObjectType( string $data ): int {
- $isTree = strpos( $data, "tree " ) === 0;
- $isObj = strpos( $data, "object " ) === 0;
- $result = 3;
-
- if( $isTree ) {
- $result = 1;
- } elseif( $isObj ) {
- $result = 4;
- } elseif( $this->isTreeData( $data ) ) {
- $result = 2;
- }
-
- return $result;
- }
-}
-
-class MissingFile extends File {
- public function __construct() {
- parent::__construct( '', '', '0', 0, 0, '' );
- }
-
- public function emitRawHeaders(): void {
- header( "HTTP/1.1 404 Not Found" );
- exit;
- }
-}
+ foreach( $this->streamCompressed( $sha ) as $compressed ) {
+ hash_update( $ctx, $compressed );
+ yield $compressed;
+ }
+ }
+
+ yield hash_final( $ctx, true );
+ }
+
+ private function streamCompressed( string $sha ): Generator {
+ $yielded = false;
+
+ foreach( $this->packs->streamRawCompressed( $sha ) as $chunk ) {
+ $yielded = true;
+ yield $chunk;
+ }
+
+ if( !$yielded ) {
+ $deflate = deflate_init( ZLIB_ENCODING_DEFLATE );
+
+ foreach( $this->slurpChunks( $sha ) as $raw ) {
+ $compressed = deflate_add( $deflate, $raw, ZLIB_NO_FLUSH );
+
+ if( $compressed !== '' ) {
+ yield $compressed;
+ }
+ }
+
+ $final = deflate_add( $deflate, '', ZLIB_FINISH );
+
+ if( $final !== '' ) {
+ yield $final;
+ }
+ }
+ }
+
+ private function slurpChunks( string $sha ): Generator {
+ $path = $this->getLoosePath( $sha );
+
+ if( is_file( $path ) ) {
+ yield from $this->looseObjectChunks( $path );
+ } else {
+ $any = false;
+
+ foreach( $this->packs->streamGenerator( $sha ) as $chunk ) {
+ $any = true;
+ yield $chunk;
+ }
+
+ if( !$any ) {
+ $data = $this->packs->read( $sha );
+
+ if( $data !== '' ) {
+ yield $data;
+ }
+ }
+ }
+ }
+
+ private function looseObjectChunks( string $path ): Generator {
+ $reader = BufferedFileReader::open( $path );
+ $infl = $reader->isOpen()
+ ? inflate_init( ZLIB_ENCODING_DEFLATE )
+ : false;
+
+ if( $reader->isOpen() && $infl !== false ) {
+ $found = false;
+ $buffer = '';
+
+ while( !$reader->eof() ) {
+ $chunk = $reader->read( 16384 );
+ $inflated = inflate_add( $infl, $chunk );
+
+ if( $inflated === false ) {
+ break;
+ }
+
+ if( !$found ) {
+ $buffer .= $inflated;
+ $eos = strpos( $buffer, "\0" );
+
+ if( $eos !== false ) {
+ $found = true;
+ $body = substr( $buffer, $eos + 1 );
+
+ if( $body !== '' ) {
+ yield $body;
+ }
+
+ $buffer = '';
+ }
+ } elseif( $inflated !== '' ) {
+ yield $inflated;
+ }
+ }
+ }
+ }
+
+ private function streamCompressedObject( string $sha, $ctx ): Generator {
+ $stream = CompressionStream::createDeflater();
+ $buffer = '';
+
+ $this->slurp( $sha, function( $chunk ) use (
+ $stream,
+ $ctx,
+ &$buffer
+ ) {
+ $compressed = $stream->pump( $chunk );
+
+ if( $compressed !== '' ) {
+ hash_update( $ctx, $compressed );
+ $buffer .= $compressed;
+ }
+ } );
+
+ $final = $stream->finish();
+
+ if( $final !== '' ) {
+ hash_update( $ctx, $final );
+ $buffer .= $final;
+ }
+
+ $pos = 0;
+ $len = strlen( $buffer );
+
+ while( $pos < $len ) {
+ $chunk = substr( $buffer, $pos, 32768 );
+
+ yield $chunk;
+ $pos += 32768;
+ }
+ }
+
+ private function getTreeSha( string $commitOrTreeSha ): string {
+ $data = $this->read( $commitOrTreeSha );
+ $sha = $commitOrTreeSha;
+
+ if( preg_match( '/^object ([0-9a-f]{40})/m', $data, $matches ) ) {
+ $sha = $this->getTreeSha( $matches[1] );
+ }
+
+ if( $sha === $commitOrTreeSha &&
+ preg_match( '/^tree ([0-9a-f]{40})/m', $data, $matches ) ) {
+ $sha = $matches[1];
+ }
+
+ return $sha;
+ }
+
+ private function resolvePath( string $treeSha, string $path ): array {
+ $parts = explode( '/', trim( $path, '/' ) );
+ $sha = $treeSha;
+ $mode = '40000';
+
+ foreach( $parts as $part ) {
+ $entry = [ 'sha' => '', 'mode' => '' ];
+
+ if( $part !== '' && $sha !== '' ) {
+ $entry = $this->findTreeEntry( $sha, $part );
+ }
+
+ $sha = $entry['sha'];
+ $mode = $entry['mode'];
+ }
+
+ return [
+ 'sha' => $sha,
+ 'mode' => $mode,
+ 'isDir' => $mode === '40000' || $mode === '040000'
+ ];
+ }
+
+ private function findTreeEntry( string $treeSha, string $name ): array {
+ $data = $this->read( $treeSha );
+ $entry = [ 'sha' => '', 'mode' => '' ];
+
+ $this->parseTreeData(
+ $data,
+ function( $file, $n, $sha, $mode ) use ( $name, &$entry ) {
+ if( $file->isName( $name ) ) {
+ $entry = [ 'sha' => $sha, 'mode' => $mode ];
+
+ return false;
+ }
+ }
+ );
+
+ return $entry;
+ }
+
+ private function parseTagData(
+ string $name,
+ string $sha,
+ string $data
+ ): Tag {
+ $isAnn = strncmp( $data, 'object ', 7 ) === 0;
+ $pattern = $isAnn
+ ? '/^tagger (.*) <(.*)> (\d+) [+\-]\d{4}$/m'
+ : '/^author (.*) <(.*)> (\d+) [+\-]\d{4}$/m';
+ $id = $this->parseIdentity( $data, $pattern );
+ $target = $isAnn
+ ? $this->extractPattern( $data, '/^object (.*)$/m', 1, $sha )
+ : $sha;
+
+ return new Tag(
+ $name,
+ $sha,
+ $target,
+ $id['timestamp'],
+ $this->extractMessage( $data ),
+ $id['name']
+ );
+ }
+
+ private function extractPattern(
+ string $data,
+ string $pattern,
+ int $group,
+ string $default = ''
+ ): string {
+ return preg_match( $pattern, $data, $matches )
+ ? $matches[$group]
+ : $default;
+ }
+
+ private function parseIdentity( string $data, string $pattern ): array {
+ $found = preg_match( $pattern, $data, $matches );
+
+ return [
+ 'name' => $found ? trim( $matches[1] ) : 'Unknown',
+ 'email' => $found ? $matches[2] : '',
+ 'timestamp' => $found ? (int)$matches[3] : 0
+ ];
+ }
+
+ private function extractMessage( string $data ): string {
+ $pos = strpos( $data, "\n\n" );
+
+ return $pos !== false ? trim( substr( $data, $pos + 2 ) ) : '';
+ }
+
+ private function slurp( string $sha, callable $callback ): void {
+ $path = $this->getLoosePath( $sha );
+
+ if( is_file( $path ) ) {
+ $this->slurpLooseObject( $path, $callback );
+ } else {
+ $this->slurpPackedObject( $sha, $callback );
+ }
+ }
+
+ private function slurpLooseObject( string $path, callable $callback ): void {
+ $this->iterateInflated(
+ $path,
+ function( $chunk ) use ( $callback ) {
+ if( $chunk !== '' ) {
+ $callback( $chunk );
+ }
+
+ return true;
+ }
+ );
+ }
+
+ private function slurpPackedObject( string $sha, callable $callback ): void {
+ $streamed = $this->packs->stream( $sha, $callback );
+
+ if( !$streamed ) {
+ $data = $this->packs->read( $sha );
+
+ if( $data !== '' ) {
+ $callback( $data );
+ }
+ }
+ }
+
+ private function iterateInflated(
+ string $path,
+ callable $processor
+ ): void {
+ $reader = BufferedFileReader::open( $path );
+ $infl = $reader->isOpen()
+ ? inflate_init( ZLIB_ENCODING_DEFLATE )
+ : false;
+ $found = false;
+ $buffer = '';
+
+ if( $reader->isOpen() && $infl !== false ) {
+ while( !$reader->eof() ) {
+ $chunk = $reader->read( 16384 );
+ $inflated = inflate_add( $infl, $chunk );
+
+ if( $inflated === false ) {
+ break;
+ }
+
+ if( !$found ) {
+ $buffer .= $inflated;
+ $eos = strpos( $buffer, "\0" );
+
+ if( $eos !== false ) {
+ $found = true;
+ $body = substr( $buffer, $eos + 1 );
+ $head = substr( $buffer, 0, $eos );
+
+ if( $processor( $body, $head ) === false ) {
+ break;
+ }
+ }
+ } elseif( $processor( $inflated, '' ) === false ) {
+ break;
+ }
+ }
+ }
+ }
+
+ private function peekLooseObject( string $sha, int $length ): string {
+ $path = $this->getLoosePath( $sha );
+ $buf = '';
+
+ if( is_file( $path ) ) {
+ $this->iterateInflated(
+ $path,
+ function( $chunk ) use ( $length, &$buf ) {
+ $buf .= $chunk;
+
+ return strlen( $buf ) < $length;
+ }
+ );
+ }
+
+ return substr( $buf, 0, $length );
+ }
+
+ private function parseCommit( string $sha ): object {
+ $data = $this->read( $sha );
+ $result = (object)[ 'sha' => '' ];
+
+ if( $data !== '' ) {
+ $id = $this->parseIdentity(
+ $data,
+ '/^author (.*) <(.*)> (\d+)/m'
+ );
+
+ $result = (object)[
+ 'sha' => $sha,
+ 'message' => $this->extractMessage( $data ),
+ 'author' => $id['name'],
+ 'email' => $id['email'],
+ 'date' => $id['timestamp'],
+ 'parentSha' => $this->extractPattern( $data, '/^parent (.*)$/m', 1 )
+ ];
+ }
+
+ return $result;
+ }
+
+ private function walkTree( string $sha, callable $callback ): void {
+ $data = $this->read( $sha );
+ $tree = $data;
+
+ if( $data !== '' && preg_match( '/^tree (.*)$/m', $data, $m ) ) {
+ $tree = $this->read( $m[1] );
+ }
+
+ if( $tree !== '' && $this->isTreeData( $tree ) ) {
+ $this->processTree( $tree, $callback );
+ }
+ }
+
+ private function processTree( string $data, callable $callback ): void {
+ $this->parseTreeData(
+ $data,
+ function( $file, $n, $s, $m ) use ( $callback ) {
+ $callback( $file );
+ }
+ );
+ }
+
+ public function parseTreeData( string $data, callable $callback ): void {
+ $pos = 0;
+ $len = strlen( $data );
+
+ while( $pos < $len ) {
+ $space = strpos( $data, ' ', $pos );
+ $eos = strpos( $data, "\0", $space );
+
+ if( $space === false || $eos === false || $eos + 21 > $len ) {
+ break;
+ }
+
+ $mode = substr( $data, $pos, $space - $pos );
+ $name = substr( $data, $space + 1, $eos - $space - 1 );
+ $sha = bin2hex( substr( $data, $eos + 1, 20 ) );
+ $dir = $mode === '40000' || $mode === '040000';
+ $isSub = $mode === '160000';
+
+ $file = new File(
+ $name,
+ $sha,
+ $mode,
+ 0,
+ $dir || $isSub ? 0 : $this->getObjectSize( $sha ),
+ $dir || $isSub ? '' : $this->peek( $sha )
+ );
+
+ if( $callback( $file, $name, $sha, $mode ) === false ) {
+ break;
+ }
+
+ $pos = $eos + 21;
+ }
+ }
+
+ private function isTreeData( string $data ): bool {
+ $len = strlen( $data );
+ $patt = '/^(40000|100644|100755|120000|160000) /';
+ $match = $len >= 25 && preg_match( $patt, $data );
+ $eos = $match ? strpos( $data, "\0" ) : false;
+
+ return $match && $eos !== false && $eos + 21 <= $len;
+ }
+
+ private function getLoosePath( string $sha ): string {
+ return "{$this->objPath}/" . substr( $sha, 0, 2 ) . "/" .
+ substr( $sha, 2 );
+ }
+
+ private function getLooseObjectSize( string $sha ): int {
+ $path = $this->getLoosePath( $sha );
+ $size = 0;
+
+ if( is_file( $path ) ) {
+ $this->iterateInflated(
+ $path,
+ function( $c, $head ) use ( &$size ) {
+ if( $head !== '' ) {
+ $parts = explode( ' ', $head );
+ $size = isset( $parts[1] ) ? (int)$parts[1] : 0;
+ }
+
+ return false;
+ }
+ );
+ }
+
+ return $size;
+ }
+
+ public function collectObjects( array $wants, array $haves = [] ): array {
+ $objs = $this->traverseObjects( $wants );
+ $result = [];
+
+ if( !empty( $haves ) ) {
+ $haveObjs = $this->traverseObjects( $haves );
+
+ foreach( $haveObjs as $sha => $type ) {
+ if( isset( $objs[$sha] ) ) {
+ unset( $objs[$sha] );
+ }
+ }
+ }
+
+ $result = $objs;
+
+ return $result;
+ }
+
+ private function traverseObjects( array $roots ): array {
+ $objs = [];
+ $queue = [];
+
+ foreach( $roots as $sha ) {
+ $queue[] = [ 'sha' => $sha, 'type' => 0 ];
+ }
+
+ while( !empty( $queue ) ) {
+ $item = array_pop( $queue );
+ $sha = $item['sha'];
+ $type = $item['type'];
+
+ if( isset( $objs[$sha] ) ) {
+ continue;
+ }
+
+ $data = '';
+
+ if( $type !== 3 ) {
+ $data = $this->read( $sha );
+
+ if( $type === 0 ) {
+ $type = $this->getObjectType( $data );
+ }
+ }
+
+ $objs[$sha] = $type;
+
+ if( $type === 1 ) {
+ $hasTree = preg_match( '/^tree ([0-9a-f]{40})/m', $data, $m );
+
+ if( $hasTree ) {
+ $queue[] = [ 'sha' => $m[1], 'type' => 2 ];
+ }
+
+ $hasParents = preg_match_all(
+ '/^parent ([0-9a-f]{40})/m',
+ $data,
+ $m
+ );
+
+ if( $hasParents ) {
+ foreach( $m[1] as $parentSha ) {
+ $queue[] = [ 'sha' => $parentSha, 'type' => 1 ];
+ }
+ }
+ } elseif( $type === 2 ) {
+ $pos = 0;
+ $len = strlen( $data );
+
+ while( $pos < $len ) {
+ $space = strpos( $data, ' ', $pos );
+ $eos = strpos( $data, "\0", $space );
+
+ if( $space === false || $eos === false ) {
+ break;
+ }
+
+ $mode = substr( $data, $pos, $space - $pos );
+ $hash = bin2hex( substr( $data, $eos + 1, 20 ) );
+
+ if( $mode !== '160000' ) {
+ $isDir = $mode === '40000' || $mode === '040000';
+ $queue[] = [ 'sha' => $hash, 'type' => $isDir ? 2 : 3 ];
+ }
+
+ $pos = $eos + 21;
+ }
+ } elseif( $type === 4 ) {
+ $isTagTgt = preg_match( '/^object ([0-9a-f]{40})/m', $data, $m );
+
+ if( $isTagTgt ) {
+ $nextType = 1;
+
+ if( preg_match( '/^type (commit|tree|blob|tag)/m', $data, $t ) ) {
+ $map = [
+ 'commit' => 1,
+ 'tree' => 2,
+ 'blob' => 3,
+ 'tag' => 4
+ ];
+ $nextType = $map[$t[1]] ?? 1;
+ }
+
+ $queue[] = [ 'sha' => $m[1], 'type' => $nextType ];
+ }
+ }
+ }
+
+ return $objs;
+ }
+
+ private function getObjectType( string $data ): int {
+ $isTree = strpos( $data, "tree " ) === 0;
+ $isObj = strpos( $data, "object " ) === 0;
+ $result = 3;
+
+ if( $isTree ) {
+ $result = 1;
+ } elseif( $isObj ) {
+ $result = 4;
+ } elseif( $this->isTreeData( $data ) ) {
+ $result = 2;
+ }
+
+ return $result;
+ }
+}
+
+class MissingFile extends File {
+ public function __construct() {
+ parent::__construct( '', '', '0', 0, 0, '' );
+ }
+
+ public function emitRawHeaders(): void {
+ header( "HTTP/1.1 404 Not Found" );
+ exit;
+ }
+}
+
git/GitPacks.php
}
- private function streamShaGenerator( string $sha, int $depth ): Generator {
- $info = $this->findPackInfo( $sha );
-
- if( $info['offset'] !== 0 ) {
- $handle = $this->getHandle( $info['file'] );
-
- if( $handle ) {
- yield from $this->streamPackEntryGenerator(
- $handle,
- $info['offset'],
- $depth
- );
- }
- }
- }
-
- public function getSize( string $sha ): int {
- $info = $this->findPackInfo( $sha );
- $result = 0;
-
- if( $info['offset'] !== 0 ) {
- $result = $this->extractPackedSize( $info['file'], $info['offset'] );
- }
-
- return $result;
- }
-
- private function findPackInfo( string $sha ): array {
- $result = [ 'offset' => 0, 'file' => '' ];
-
- if( strlen( $sha ) === 40 && ctype_xdigit( $sha ) ) {
- $binarySha = hex2bin( $sha );
-
- if( $this->lastPack !== '' ) {
- $offset = $this->findInIdx( $this->lastPack, $binarySha );
-
- if( $offset !== 0 ) {
- $result = [
- 'file' => str_replace( '.idx', '.pack', $this->lastPack ),
- 'offset' => $offset
- ];
- }
- }
-
- if( $result['offset'] === 0 ) {
- $count = count( $this->packFiles );
- $idx = 0;
- $found = false;
-
- while( !$found && $idx < $count ) {
- $indexFile = $this->packFiles[$idx];
-
- if( $indexFile !== $this->lastPack ) {
- $offset = $this->findInIdx( $indexFile, $binarySha );
-
- if( $offset !== 0 ) {
- $this->lastPack = $indexFile;
- $result = [
- 'file' => str_replace( '.idx', '.pack', $indexFile ),
- 'offset' => $offset
- ];
- $found = true;
- }
- }
-
- $idx++;
- }
- }
- }
-
- return $result;
- }
-
- private function findInIdx( string $indexFile, string $binarySha ): int {
- $handle = $this->getHandle( $indexFile );
- $result = 0;
-
- if( $handle ) {
- if( !isset( $this->fanoutCache[$indexFile] ) ) {
- fseek( $handle, 0 );
- $head = fread( $handle, 8 );
-
- if( $head === "\377tOc\0\0\0\2" ) {
- $this->fanoutCache[$indexFile] = array_values(
- unpack( 'N*', fread( $handle, 1024 ) )
- );
- }
- }
-
- if( isset( $this->fanoutCache[$indexFile] ) ) {
- $fanout = $this->fanoutCache[$indexFile];
- $byte = ord( $binarySha[0] );
- $start = $byte === 0 ? 0 : $fanout[$byte - 1];
- $end = $fanout[$byte];
-
- if( $end > $start ) {
- $result = $this->binarySearchIdx(
- $indexFile,
- $handle,
- $start,
- $end,
- $binarySha,
- $fanout[255]
- );
- }
- }
- }
-
- return $result;
- }
-
- private function binarySearchIdx(
- string $indexFile,
- mixed $handle,
- int $start,
- int $end,
- string $binarySha,
- int $total
- ): int {
- $low = $start;
- $high = $end - 1;
- $result = 0;
-
- while( $result === 0 && $low <= $high ) {
- $mid = ($low + $high) >> 1;
-
- fseek( $handle, 1032 + ($mid * 20) );
-
- $cmp = fread( $handle, 20 );
-
- if( $cmp < $binarySha ) {
- $low = $mid + 1;
- } elseif( $cmp > $binarySha ) {
- $high = $mid - 1;
- } else {
- fseek( $handle, 1032 + ($total * 24) + ($mid * 4) );
-
- $packed = fread( $handle, 4 );
- $offset = unpack( 'N', $packed )[1];
-
- if( $offset & 0x80000000 ) {
- $pos = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8);
-
- fseek( $handle, $pos );
-
- $offset = unpack( 'J', fread( $handle, 8 ) )[1];
- }
-
- $result = (int)$offset;
- }
- }
-
- return $result;
- }
-
- private function readPackEntry(
- $handle,
- int $offset,
- int $size,
- int $cap = 0
- ): string {
- fseek( $handle, $offset );
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
- $result = '';
-
- if( $type === 6 ) {
- $result = $this->handleOfsDelta( $handle, $offset, $size, $cap );
- } elseif( $type === 7 ) {
- $result = $this->handleRefDelta( $handle, $size, $cap );
- } else {
- $result = $this->decompressToString( $handle, $cap );
- }
-
- return $result;
- }
-
- private function streamPackEntryGenerator(
- $handle,
- int $offset,
- int $depth
- ): Generator {
- fseek( $handle, $offset );
- $header = $this->readVarInt( $handle );
- $type = ($header['byte'] >> 4) & 7;
-
- if( $type === 6 || $type === 7 ) {
- yield from $this->streamDeltaObjectGenerator(
- $handle,
- $offset,
- $type,
- $depth
- );
- } else {
- yield from $this->streamDecompressionGenerator( $handle );
- }
- }
-
- private function resolveBaseToTempFile(
- $packHandle,
- int $baseOffset,
- int $depth
- ) {
- $tmpHandle = tmpfile();
-
- if( $tmpHandle !== false ) {
- foreach( $this->streamPackEntryGenerator(
- $packHandle,
- $baseOffset,
- $depth + 1
- ) as $chunk ) {
- fwrite( $tmpHandle, $chunk );
- }
-
- rewind( $tmpHandle );
- } else {
- error_log(
- "[GitPacks] tmpfile failed for ofs-delta base at $baseOffset"
- );
- }
-
- return $tmpHandle;
- }
-
- private function streamDeltaObjectGenerator(
- $handle,
- int $offset,
- int $type,
- int $depth
- ): Generator {
- if( $depth < self::MAX_DEPTH ) {
- fseek( $handle, $offset );
- $this->readVarInt( $handle );
-
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $handle );
- $deltaPos = ftell( $handle );
- $baseSize = $this->extractPackedSize( $handle, $offset - $neg );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpHandle = $this->resolveBaseToTempFile(
- $handle,
- $offset - $neg,
- $depth
- );
-
- if( $tmpHandle !== false ) {
- fseek( $handle, $deltaPos );
- yield from $this->applyDeltaStreamGenerator(
- $handle,
- $tmpHandle
- );
-
- fclose( $tmpHandle );
- }
- } else {
- $base = '';
-
- foreach( $this->streamPackEntryGenerator(
- $handle,
- $offset - $neg,
- $depth + 1
- ) as $chunk ) {
- $base .= $chunk;
- }
-
- fseek( $handle, $deltaPos );
- yield from $this->applyDeltaStreamGenerator( $handle, $base );
- }
- } else {
- $baseSha = bin2hex( fread( $handle, 20 ) );
- $baseSize = $this->getSize( $baseSha );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpHandle = tmpfile();
-
- if( $tmpHandle !== false ) {
- $written = false;
-
- foreach( $this->streamShaGenerator(
- $baseSha,
- $depth + 1
- ) as $chunk ) {
- fwrite( $tmpHandle, $chunk );
- $written = true;
- }
-
- if( $written ) {
- rewind( $tmpHandle );
- yield from $this->applyDeltaStreamGenerator(
- $handle,
- $tmpHandle
- );
- }
-
- fclose( $tmpHandle );
- } else {
- error_log(
- "[GitPacks] tmpfile() failed for ref-delta (sha=$baseSha)"
- );
- }
- } else {
- $base = '';
- $written = false;
-
- foreach( $this->streamShaGenerator(
- $baseSha,
- $depth + 1
- ) as $chunk ) {
- $base .= $chunk;
- $written = true;
- }
-
- if( $written ) {
- yield from $this->applyDeltaStreamGenerator( $handle, $base );
- }
- }
- }
- } else {
- error_log( "[GitPacks] delta depth limit exceeded at offset $offset" );
- }
- }
-
- private function applyDeltaStreamGenerator(
- $handle,
- $base
- ): Generator {
- $stream = CompressionStream::createInflater();
- $state = 0;
- $buffer = '';
- $done = false;
- $isFile = is_resource( $base );
-
- while( !$done && !feof( $handle ) ) {
- $chunk = fread( $handle, 8192 );
- $done = $chunk === false || $chunk === '';
-
- if( !$done ) {
- $data = $stream->pump( $chunk );
-
- if( $data !== '' ) {
- $buffer .= $data;
- $doneBuffer = false;
-
- while( !$doneBuffer ) {
- $len = strlen( $buffer );
-
- if( $len === 0 ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- if( $state < 2 ) {
- $pos = 0;
-
- while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) {
- $pos++;
- }
-
- if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- $buffer = substr( $buffer, $pos + 1 );
- $state++;
- }
- } else {
- $op = ord( $buffer[0] );
-
- if( $op & 128 ) {
- $need = $this->getCopyInstructionSize( $op );
-
- if( $len < 1 + $need ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- $info = $this->parseCopyInstruction( $op, $buffer, 1 );
-
- if( $isFile ) {
- fseek( $base, $info['off'] );
- $rem = $info['len'];
-
- while( $rem > 0 ) {
- $slc = fread( $base, min( 65536, $rem ) );
-
- if( $slc === false || $slc === '' ) {
- $rem = 0;
- } else {
- yield $slc;
- $rem -= strlen( $slc );
- }
- }
- } else {
- yield substr( $base, $info['off'], $info['len'] );
- }
-
- $buffer = substr( $buffer, 1 + $need );
- }
- } else {
- $ln = $op & 127;
-
- if( $len < 1 + $ln ) {
- $doneBuffer = true;
- }
-
- if( !$doneBuffer ) {
- yield substr( $buffer, 1, $ln );
- $buffer = substr( $buffer, 1 + $ln );
- }
- }
- }
- }
- }
- }
-
- $done = $stream->finished();
- }
- }
- }
-
- private function streamDecompressionGenerator( $handle ): Generator {
- $stream = CompressionStream::createInflater();
- $done = false;
-
- while( !$done && !feof( $handle ) ) {
- $chunk = fread( $handle, 8192 );
- $done = $chunk === false || $chunk === '';
-
- if( !$done ) {
- $data = $stream->pump( $chunk );
-
- if( $data !== '' ) {
- yield $data;
+ public function streamRawCompressed( string $sha ): Generator {
+ $info = $this->findPackInfo( $sha );
+
+ if( $info['offset'] !== 0 ) {
+ $handle = $this->getHandle( $info['file'] );
+
+ if( is_resource( $handle ) ) {
+ fseek( $handle, $info['offset'] );
+
+ $header = $this->readVarInt( $handle );
+ $type = ($header['byte'] >> 4) & 7;
+
+ if( $type !== 6 && $type !== 7 ) {
+ $stream = CompressionStream::createExtractor();
+
+ yield from $stream->stream( $handle );
+ }
+ }
+ }
+ }
+
+ private function streamShaGenerator( string $sha, int $depth ): Generator {
+ $info = $this->findPackInfo( $sha );
+
+ if( $info['offset'] !== 0 ) {
+ $handle = $this->getHandle( $info['file'] );
+
+ if( $handle ) {
+ yield from $this->streamPackEntryGenerator(
+ $handle,
+ $info['offset'],
+ $depth
+ );
+ }
+ }
+ }
+
+ public function getSize( string $sha ): int {
+ $info = $this->findPackInfo( $sha );
+ $result = 0;
+
+ if( $info['offset'] !== 0 ) {
+ $result = $this->extractPackedSize( $info['file'], $info['offset'] );
+ }
+
+ return $result;
+ }
+
+ private function findPackInfo( string $sha ): array {
+ $result = [ 'offset' => 0, 'file' => '' ];
+
+ if( strlen( $sha ) === 40 && ctype_xdigit( $sha ) ) {
+ $binarySha = hex2bin( $sha );
+
+ if( $this->lastPack !== '' ) {
+ $offset = $this->findInIdx( $this->lastPack, $binarySha );
+
+ if( $offset !== 0 ) {
+ $result = [
+ 'file' => str_replace( '.idx', '.pack', $this->lastPack ),
+ 'offset' => $offset
+ ];
+ }
+ }
+
+ if( $result['offset'] === 0 ) {
+ $count = count( $this->packFiles );
+ $idx = 0;
+ $found = false;
+
+ while( !$found && $idx < $count ) {
+ $indexFile = $this->packFiles[$idx];
+
+ if( $indexFile !== $this->lastPack ) {
+ $offset = $this->findInIdx( $indexFile, $binarySha );
+
+ if( $offset !== 0 ) {
+ $this->lastPack = $indexFile;
+ $result = [
+ 'file' => str_replace( '.idx', '.pack', $indexFile ),
+ 'offset' => $offset
+ ];
+ $found = true;
+ }
+ }
+
+ $idx++;
+ }
+ }
+ }
+
+ return $result;
+ }
+
+ private function findInIdx( string $indexFile, string $binarySha ): int {
+ $handle = $this->getHandle( $indexFile );
+ $result = 0;
+
+ if( $handle ) {
+ if( !isset( $this->fanoutCache[$indexFile] ) ) {
+ fseek( $handle, 0 );
+ $head = fread( $handle, 8 );
+
+ if( $head === "\377tOc\0\0\0\2" ) {
+ $this->fanoutCache[$indexFile] = array_values(
+ unpack( 'N*', fread( $handle, 1024 ) )
+ );
+ }
+ }
+
+ if( isset( $this->fanoutCache[$indexFile] ) ) {
+ $fanout = $this->fanoutCache[$indexFile];
+ $byte = ord( $binarySha[0] );
+ $start = $byte === 0 ? 0 : $fanout[$byte - 1];
+ $end = $fanout[$byte];
+
+ if( $end > $start ) {
+ $result = $this->binarySearchIdx(
+ $indexFile,
+ $handle,
+ $start,
+ $end,
+ $binarySha,
+ $fanout[255]
+ );
+ }
+ }
+ }
+
+ return $result;
+ }
+
+ private function binarySearchIdx(
+ string $indexFile,
+ mixed $handle,
+ int $start,
+ int $end,
+ string $binarySha,
+ int $total
+ ): int {
+ $low = $start;
+ $high = $end - 1;
+ $result = 0;
+
+ while( $result === 0 && $low <= $high ) {
+ $mid = ($low + $high) >> 1;
+
+ fseek( $handle, 1032 + ($mid * 20) );
+
+ $cmp = fread( $handle, 20 );
+
+ if( $cmp < $binarySha ) {
+ $low = $mid + 1;
+ } elseif( $cmp > $binarySha ) {
+ $high = $mid - 1;
+ } else {
+ fseek( $handle, 1032 + ($total * 24) + ($mid * 4) );
+
+ $packed = fread( $handle, 4 );
+ $offset = unpack( 'N', $packed )[1];
+
+ if( $offset & 0x80000000 ) {
+ $pos = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8);
+
+ fseek( $handle, $pos );
+
+ $offset = unpack( 'J', fread( $handle, 8 ) )[1];
+ }
+
+ $result = (int)$offset;
+ }
+ }
+
+ return $result;
+ }
+
+ private function readPackEntry(
+ $handle,
+ int $offset,
+ int $size,
+ int $cap = 0
+ ): string {
+ fseek( $handle, $offset );
+ $header = $this->readVarInt( $handle );
+ $type = ($header['byte'] >> 4) & 7;
+ $result = '';
+
+ if( $type === 6 ) {
+ $result = $this->handleOfsDelta( $handle, $offset, $size, $cap );
+ } elseif( $type === 7 ) {
+ $result = $this->handleRefDelta( $handle, $size, $cap );
+ } else {
+ $result = $this->decompressToString( $handle, $cap );
+ }
+
+ return $result;
+ }
+
+ private function streamPackEntryGenerator(
+ $handle,
+ int $offset,
+ int $depth
+ ): Generator {
+ fseek( $handle, $offset );
+ $header = $this->readVarInt( $handle );
+ $type = ($header['byte'] >> 4) & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ yield from $this->streamDeltaObjectGenerator(
+ $handle,
+ $offset,
+ $type,
+ $depth
+ );
+ } else {
+ $stream = CompressionStream::createInflater();
+
+ yield from $stream->stream( $handle );
+ }
+ }
+
+ private function resolveBaseToTempFile(
+ $packHandle,
+ int $baseOffset,
+ int $depth
+ ) {
+ $tmpHandle = tmpfile();
+
+ if( $tmpHandle !== false ) {
+ foreach( $this->streamPackEntryGenerator(
+ $packHandle,
+ $baseOffset,
+ $depth + 1
+ ) as $chunk ) {
+ fwrite( $tmpHandle, $chunk );
+ }
+
+ rewind( $tmpHandle );
+ } else {
+ error_log(
+ "[GitPacks] tmpfile failed for ofs-delta base at $baseOffset"
+ );
+ }
+
+ return $tmpHandle;
+ }
+
+ private function streamDeltaObjectGenerator(
+ $handle,
+ int $offset,
+ int $type,
+ int $depth
+ ): Generator {
+ if( $depth < self::MAX_DEPTH ) {
+ fseek( $handle, $offset );
+ $this->readVarInt( $handle );
+
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $handle );
+ $deltaPos = ftell( $handle );
+ $baseSize = $this->extractPackedSize( $handle, $offset - $neg );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpHandle = $this->resolveBaseToTempFile(
+ $handle,
+ $offset - $neg,
+ $depth
+ );
+
+ if( $tmpHandle !== false ) {
+ fseek( $handle, $deltaPos );
+ yield from $this->applyDeltaStreamGenerator(
+ $handle,
+ $tmpHandle
+ );
+
+ fclose( $tmpHandle );
+ }
+ } else {
+ $base = '';
+
+ foreach( $this->streamPackEntryGenerator(
+ $handle,
+ $offset - $neg,
+ $depth + 1
+ ) as $chunk ) {
+ $base .= $chunk;
+ }
+
+ fseek( $handle, $deltaPos );
+ yield from $this->applyDeltaStreamGenerator( $handle, $base );
+ }
+ } else {
+ $baseSha = bin2hex( fread( $handle, 20 ) );
+ $baseSize = $this->getSize( $baseSha );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpHandle = tmpfile();
+
+ if( $tmpHandle !== false ) {
+ $written = false;
+
+ foreach( $this->streamShaGenerator(
+ $baseSha,
+ $depth + 1
+ ) as $chunk ) {
+ fwrite( $tmpHandle, $chunk );
+ $written = true;
+ }
+
+ if( $written ) {
+ rewind( $tmpHandle );
+ yield from $this->applyDeltaStreamGenerator(
+ $handle,
+ $tmpHandle
+ );
+ }
+
+ fclose( $tmpHandle );
+ } else {
+ error_log(
+ "[GitPacks] tmpfile() failed for ref-delta (sha=$baseSha)"
+ );
+ }
+ } else {
+ $base = '';
+ $written = false;
+
+ foreach( $this->streamShaGenerator(
+ $baseSha,
+ $depth + 1
+ ) as $chunk ) {
+ $base .= $chunk;
+ $written = true;
+ }
+
+ if( $written ) {
+ yield from $this->applyDeltaStreamGenerator( $handle, $base );
+ }
+ }
+ }
+ } else {
+ error_log( "[GitPacks] delta depth limit exceeded at offset $offset" );
+ }
+ }
+
+ private function applyDeltaStreamGenerator(
+ $handle,
+ $base
+ ): Generator {
+ $stream = CompressionStream::createInflater();
+ $state = 0;
+ $buffer = '';
+ $done = false;
+ $isFile = is_resource( $base );
+
+ while( !$done && !feof( $handle ) ) {
+ $chunk = fread( $handle, 8192 );
+ $done = $chunk === false || $chunk === '';
+
+ if( !$done ) {
+ $data = $stream->pump( $chunk );
+
+ if( $data !== '' ) {
+ $buffer .= $data;
+ $doneBuffer = false;
+
+ while( !$doneBuffer ) {
+ $len = strlen( $buffer );
+
+ if( $len === 0 ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ if( $state < 2 ) {
+ $pos = 0;
+
+ while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) {
+ $pos++;
+ }
+
+ if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ $buffer = substr( $buffer, $pos + 1 );
+ $state++;
+ }
+ } else {
+ $op = ord( $buffer[0] );
+
+ if( $op & 128 ) {
+ $need = $this->getCopyInstructionSize( $op );
+
+ if( $len < 1 + $need ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ $info = $this->parseCopyInstruction( $op, $buffer, 1 );
+
+ if( $isFile ) {
+ fseek( $base, $info['off'] );
+ $rem = $info['len'];
+
+ while( $rem > 0 ) {
+ $slc = fread( $base, min( 65536, $rem ) );
+
+ if( $slc === false || $slc === '' ) {
+ $rem = 0;
+ } else {
+ yield $slc;
+ $rem -= strlen( $slc );
+ }
+ }
+ } else {
+ yield substr( $base, $info['off'], $info['len'] );
+ }
+
+ $buffer = substr( $buffer, 1 + $need );
+ }
+ } else {
+ $ln = $op & 127;
+
+ if( $len < 1 + $ln ) {
+ $doneBuffer = true;
+ }
+
+ if( !$doneBuffer ) {
+ yield substr( $buffer, 1, $ln );
+ $buffer = substr( $buffer, 1 + $ln );
+ }
+ }
+ }
+ }
+ }
}
Delta1067 lines added, 1009 lines removed, 58-line increase