Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Optimizes buffer streams

AuthorDave Jarvis <email>
Date2026-02-21 18:21:32 GMT-0800
Commit38e909fe2f4d090572797ac3569d0968d703d97f
Parent60c9a68
git/DeltaDecoder.php
public function apply( string $base, string $delta, int $cap ): string {
- $pos = 0;
- $res = $this->readDeltaSize( $delta, $pos );
- $pos += $res['used'];
- $res = $this->readDeltaSize( $delta, $pos );
- $pos += $res['used'];
+ $pos = 0;
+ $this->readDeltaSize( $delta, $pos );
+ $this->readDeltaSize( $delta, $pos );
$chunks = [];
if( $op & 128 ) {
- $info = $this->parseCopyInstruction( $op, $delta, $pos );
- $extracted = substr( $base, $info['off'], $info['len'] );
+ $off = 0;
+ $ln = 0;
+
+ $this->parseCopyInstruction( $op, $delta, $pos, $off, $ln );
+
+ $extracted = substr( $base, $off, $ln );
$chunks[] = $extracted;
- $outLen += strlen( $extracted );
- $pos += $info['used'];
+ $outLen += $ln;
} else {
$ln = $op & 127;
$extracted = substr( $delta, $pos, $ln );
$chunks[] = $extracted;
- $outLen += strlen( $extracted );
+ $outLen += $ln;
$pos += $ln;
}
if( !$doneBuffer ) {
- $info = $this->parseCopyInstruction(
+ $off = 0;
+ $ln = 0;
+ $ptr = $offset + 1;
+
+ $this->parseCopyInstruction(
$op,
$buffer,
- $offset + 1
+ $ptr,
+ $off,
+ $ln
);
if( $isStream ) {
- $base->seek( $info['off'] );
+ $base->seek( $off );
- $rem = $info['len'];
+ $rem = $ln;
while( $rem > 0 ) {
}
} else {
- $yieldBuffer .= substr( $base, $info['off'], $info['len'] );
+ $yieldBuffer .= substr( $base, $off, $ln );
if( strlen( $yieldBuffer ) >= self::CHUNK_SIZE ) {
public function readDeltaTargetSize( StreamReader $handle, int $type ): int {
+ $result = 0;
+
if( $type === 6 ) {
$byte = ord( $handle->read( 1 ) );
}
}
-
- $pos = 0;
- $result = 0;
if( strlen( $head ) > 0 ) {
- $res = $this->readDeltaSize( $head, $pos );
- $pos += $res['used'];
- $res = $this->readDeltaSize( $head, $pos );
- $result = $res['val'];
+ $pos = 0;
+ $this->readDeltaSize( $head, $pos );
+ $result = $this->readDeltaSize( $head, $pos );
}
return $result;
}
public function readDeltaBaseSize( StreamReader $handle ): int {
$stream = CompressionStream::createInflater();
$head = '';
$try = 0;
+ $result = 0;
foreach( $stream->stream( $handle, 512 ) as $out ) {
$head .= $out;
$try++;
if( strlen( $head ) >= 32 || $try >= 64 ) {
break;
}
}
-
- $pos = 0;
- $result = 0;
if( strlen( $head ) > 0 ) {
- $res = $this->readDeltaSize( $head, $pos );
- $result = $res['val'];
+ $pos = 0;
+ $result = $this->readDeltaSize( $head, $pos );
}
return $result;
}
private function parseCopyInstruction(
int $op,
string $data,
- int $pos
- ): array {
+ int &$pos,
+ int &$off,
+ int &$len
+ ): void {
$off = 0;
$len = 0;
- $ptr = $pos;
-
- if( $op & 0x01 ) {
- $off |= ord( $data[$ptr++] );
- }
-
- if( $op & 0x02 ) {
- $off |= ord( $data[$ptr++] ) << 8;
- }
-
- if( $op & 0x04 ) {
- $off |= ord( $data[$ptr++] ) << 16;
- }
-
- if( $op & 0x08 ) {
- $off |= ord( $data[$ptr++] ) << 24;
- }
-
- if( $op & 0x10 ) {
- $len |= ord( $data[$ptr++] );
- }
- if( $op & 0x20 ) {
- $len |= ord( $data[$ptr++] ) << 8;
- }
+ ($op & 0x01) ? $off |= ord( $data[$pos++] ) : null;
+ ($op & 0x02) ? $off |= ord( $data[$pos++] ) << 8 : null;
+ ($op & 0x04) ? $off |= ord( $data[$pos++] ) << 16 : null;
+ ($op & 0x08) ? $off |= ord( $data[$pos++] ) << 24 : null;
- if( $op & 0x40 ) {
- $len |= ord( $data[$ptr++] ) << 16;
- }
+ ($op & 0x10) ? $len |= ord( $data[$pos++] ) : null;
+ ($op & 0x20) ? $len |= ord( $data[$pos++] ) << 8 : null;
+ ($op & 0x40) ? $len |= ord( $data[$pos++] ) << 16 : null;
- return [
- 'off' => $off,
- 'len' => $len === 0 ? 0x10000 : $len,
- 'used' => $ptr - $pos
- ];
+ $len = $len === 0 ? 0x10000 : $len;
}
}
- private function readDeltaSize( string $data, int $pos ): array {
+ private function readDeltaSize( string $data, int &$pos ): int {
$len = strlen( $data );
$val = 0;
$shift = 0;
- $start = $pos;
$done = false;
while( !$done && $pos < $len ) {
$byte = ord( $data[$pos++] );
$val |= ($byte & 0x7F) << $shift;
-
- if( !($byte & 0x80) ) {
- $done = true;
- }
-
- if( !$done ) {
- $shift += 7;
- }
+ $done = !($byte & 0x80);
+ $shift += 7;
}
- return [ 'val' => $val, 'used' => $pos - $start ];
+ return $val;
}
}
git/PackEntryReader.php
private DeltaDecoder $decoder;
-
- public function __construct( DeltaDecoder $decoder ) {
- $this->decoder = $decoder;
- }
-
- public function getSize(
- PackStreamManager $manager,
- string $packFile,
- int $offset
- ): int {
- $result = $manager->computeInt(
- $packFile,
- function( StreamReader $stream ) use ( $offset ): int {
- $stream->seek( $offset );
-
- $header = $this->readVarInt( $stream );
- $size = $header['value'];
- $type = $header['byte'] >> 4 & 7;
-
- if( $type === 6 || $type === 7 ) {
- $size = $this->decoder->readDeltaTargetSize( $stream, $type );
- }
-
- return $size;
- },
- 0
- );
-
- return $result;
- }
-
- public function read(
- PackStreamManager $manager,
- string $packFile,
- int $offset,
- int $cap,
- callable $readShaBaseFn
- ): string {
- $result = $manager->computeStringDedicated(
- $packFile,
- function( StreamReader $stream ) use (
- $offset,
- $cap,
- $readShaBaseFn
- ): string {
- $result = $this->readWithStream(
- $stream,
- $offset,
- $cap,
- $readShaBaseFn
- );
-
- return $result;
- },
- ''
- );
-
- return $result;
- }
-
- private function readWithStream(
- StreamReader $stream,
- int $offset,
- int $cap,
- callable $readShaBaseFn
- ): string {
- $stream->seek( $offset );
-
- $header = $this->readVarInt( $stream );
- $type = $header['byte'] >> 4 & 7;
- $result = '';
-
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $stream );
- $cur = $stream->tell();
- $base = $offset - $neg;
- $bData = $this->readWithStream(
- $stream,
- $base,
- $cap,
- $readShaBaseFn
- );
-
- $stream->seek( $cur );
-
- $delta = $this->inflate( $stream );
- $result = $this->decoder->apply( $bData, $delta, $cap );
- } elseif( $type === 7 ) {
- $sha = bin2hex( $stream->read( 20 ) );
- $bas = $readShaBaseFn( $sha, $cap );
- $del = $this->inflate( $stream );
- $result = $this->decoder->apply( $bas, $del, $cap );
- } else {
- $result = $this->inflate( $stream, $cap );
- }
-
- return $result;
- }
-
- public function streamRawCompressed(
- PackStreamManager $manager,
- string $packFile,
- int $offset
- ): Generator {
- yield from $manager->streamGenerator(
- $packFile,
- function( StreamReader $stream ) use ( $offset ): Generator {
- $stream->seek( $offset );
-
- $header = $this->readVarInt( $stream );
- $type = $header['byte'] >> 4 & 7;
-
- if( $type !== 6 && $type !== 7 ) {
- $extractor = CompressionStream::createExtractor();
-
- yield from $extractor->stream( $stream );
- }
- }
- );
- }
-
- public function streamEntryGenerator(
- PackStreamManager $manager,
- string $packFile,
- int $offset,
- int $depth,
- callable $getSizeShaFn,
- callable $streamShaFn
- ): Generator {
- yield from $manager->streamGeneratorDedicated(
- $packFile,
- function( StreamReader $stream ) use (
- $manager,
- $packFile,
- $offset,
- $depth,
- $getSizeShaFn,
- $streamShaFn
- ): Generator {
- $stream->seek( $offset );
-
- $header = $this->readVarInt( $stream );
- $type = $header['byte'] >> 4 & 7;
-
- if( $type === 6 || $type === 7 ) {
- yield from $this->streamDeltaObjectGenerator(
- $stream,
- $manager,
- $packFile,
- $offset,
- $type,
- $depth,
- $getSizeShaFn,
- $streamShaFn
- );
- } else {
- $inflater = CompressionStream::createInflater();
-
- yield from $inflater->stream( $stream );
- }
- }
- );
- }
-
- private function streamDeltaObjectGenerator(
- StreamReader $stream,
- PackStreamManager $manager,
- string $packFile,
- int $offset,
- int $type,
- int $depth,
- callable $getSizeShaFn,
- callable $streamShaFn
- ): Generator {
- if( $depth < self::MAX_DEPTH ) {
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $stream );
- $baseSize = $this->getSize(
- $manager,
- $packFile,
- $offset - $neg
- );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpStream = $this->resolveBaseToTempFile(
- $manager,
- $packFile,
- $offset - $neg,
- $depth,
- $getSizeShaFn,
- $streamShaFn
- );
-
- yield from $this->decoder->applyStreamGenerator(
- $stream,
- $tmpStream
- );
- } else {
- $readShaBaseFn = function(
- string $sha,
- int $cap
- ) use (
- $streamShaFn,
- $depth
- ): string {
- $chunks = [];
-
- foreach( $streamShaFn( $sha, $depth + 1 ) as $chunk ) {
- $chunks[] = $chunk;
- }
-
- $result = implode( '', $chunks );
-
- if( $cap > 0 && strlen( $result ) > $cap ) {
- $result = substr( $result, 0, $cap );
- }
-
- return $result;
- };
-
- $base = $this->read(
- $manager,
- $packFile,
- $offset - $neg,
- 0,
- $readShaBaseFn
- );
-
- yield from $this->decoder->applyStreamGenerator(
- $stream,
- $base
- );
- }
- } else {
- $baseSha = bin2hex( $stream->read( 20 ) );
- $baseSize = $getSizeShaFn( $baseSha );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpStream = BufferedReader::createTemp();
- $written = false;
-
- foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
- $tmpStream->write( $chunk );
-
- $written = true;
- }
-
- if( $written ) {
- $tmpStream->rewind();
-
- yield from $this->decoder->applyStreamGenerator(
- $stream,
- $tmpStream
- );
- }
- } else {
- $chunks = [];
- $written = false;
-
- foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
- $chunks[] = $chunk;
- $written = true;
- }
-
- if( $written ) {
- $base = implode( '', $chunks );
-
- yield from $this->decoder->applyStreamGenerator(
- $stream,
- $base
- );
- }
- }
- }
- }
- }
-
- private function resolveBaseToTempFile(
- PackStreamManager $manager,
- string $packFile,
- int $baseOffset,
- int $depth,
- callable $getSizeShaFn,
- callable $streamShaFn
- ): StreamReader {
- $result = BufferedReader::createTemp();
-
- foreach( $this->streamEntryGenerator(
- $manager,
- $packFile,
- $baseOffset,
- $depth + 1,
- $getSizeShaFn,
- $streamShaFn
- ) as $chunk ) {
- $result->write( $chunk );
- }
-
- $result->rewind();
-
- return $result;
- }
-
- private function readVarInt( StreamReader $stream ): array {
- $byte = ord( $stream->read( 1 ) );
- $val = $byte & 15;
- $shft = 4;
- $fst = $byte;
-
- while( $byte & 128 ) {
- $byte = ord( $stream->read( 1 ) );
- $val |= ($byte & 127) << $shft;
- $shft += 7;
- }
-
- return [ 'value' => $val, 'byte' => $fst ];
- }
-
- private function readOffsetDelta( StreamReader $stream ): int {
- $byte = ord( $stream->read( 1 ) );
- $result = $byte & 127;
-
- while( $byte & 128 ) {
- $byte = ord( $stream->read( 1 ) );
- $result = ($result + 1) << 7 | $byte & 127;
- }
+ private array $cache;
+
+ public function __construct( DeltaDecoder $decoder ) {
+ $this->decoder = $decoder;
+ $this->cache = [];
+ }
+
+ public function getSize(
+ PackStreamManager $manager,
+ string $packFile,
+ int $offset
+ ): int {
+ $result = $manager->computeInt(
+ $packFile,
+ function( StreamReader $stream ) use ( $offset ): int {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $size = $header['value'];
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ $size = $this->decoder->readDeltaTargetSize( $stream, $type );
+ }
+
+ return $size;
+ },
+ 0
+ );
+
+ return $result;
+ }
+
+ public function read(
+ PackStreamManager $manager,
+ string $packFile,
+ int $offset,
+ int $cap,
+ callable $readShaBaseFn
+ ): string {
+ $result = $manager->computeStringDedicated(
+ $packFile,
+ function( StreamReader $stream ) use (
+ $offset,
+ $cap,
+ $readShaBaseFn
+ ): string {
+ $result = $this->readWithStream(
+ $stream,
+ $offset,
+ $cap,
+ $readShaBaseFn
+ );
+
+ return $result;
+ },
+ ''
+ );
+
+ return $result;
+ }
+
+ private function readWithStream(
+ StreamReader $stream,
+ int $offset,
+ int $cap,
+ callable $readShaBaseFn
+ ): string {
+ $result = '';
+
+ if( isset( $this->cache[$offset] ) ) {
+ $result = $this->cache[$offset];
+ } else {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $stream );
+ $cur = $stream->tell();
+ $base = $offset - $neg;
+ $bData = $this->readWithStream(
+ $stream,
+ $base,
+ $cap,
+ $readShaBaseFn
+ );
+
+ $stream->seek( $cur );
+
+ $delta = $this->inflate( $stream );
+ $result = $this->decoder->apply( $bData, $delta, $cap );
+ } elseif( $type === 7 ) {
+ $sha = bin2hex( $stream->read( 20 ) );
+ $bas = $readShaBaseFn( $sha, $cap );
+ $del = $this->inflate( $stream );
+ $result = $this->decoder->apply( $bas, $del, $cap );
+ } else {
+ $result = $this->inflate( $stream, $cap );
+ }
+
+ $this->cache[$offset] = $result;
+
+ count( $this->cache ) > 50 ? array_shift( $this->cache ) : null;
+ }
+
+ return $result;
+ }
+
+ public function streamRawCompressed(
+ PackStreamManager $manager,
+ string $packFile,
+ int $offset
+ ): Generator {
+ yield from $manager->streamGenerator(
+ $packFile,
+ function( StreamReader $stream ) use ( $offset ): Generator {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type !== 6 && $type !== 7 ) {
+ $extractor = CompressionStream::createExtractor();
+
+ yield from $extractor->stream( $stream );
+ }
+ }
+ );
+ }
+
+ public function streamEntryGenerator(
+ PackStreamManager $manager,
+ string $packFile,
+ int $offset,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): Generator {
+ yield from $manager->streamGeneratorDedicated(
+ $packFile,
+ function( StreamReader $stream ) use (
+ $manager,
+ $packFile,
+ $offset,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ ): Generator {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ yield from $this->streamDeltaObjectGenerator(
+ $stream,
+ $manager,
+ $packFile,
+ $offset,
+ $type,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ );
+ } else {
+ $inflater = CompressionStream::createInflater();
+
+ yield from $inflater->stream( $stream );
+ }
+ }
+ );
+ }
+
+ private function streamDeltaObjectGenerator(
+ StreamReader $stream,
+ PackStreamManager $manager,
+ string $packFile,
+ int $offset,
+ int $type,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): Generator {
+ if( $depth < self::MAX_DEPTH ) {
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $stream );
+ $baseSize = $this->getSize(
+ $manager,
+ $packFile,
+ $offset - $neg
+ );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpStream = $this->resolveBaseToTempFile(
+ $manager,
+ $packFile,
+ $offset - $neg,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $tmpStream
+ );
+ } else {
+ $readShaBaseFn = function(
+ string $sha,
+ int $cap
+ ) use (
+ $streamShaFn,
+ $depth
+ ): string {
+ $chunks = [];
+
+ foreach( $streamShaFn( $sha, $depth + 1 ) as $chunk ) {
+ $chunks[] = $chunk;
+ }
+
+ $result = implode( '', $chunks );
+
+ if( $cap > 0 && strlen( $result ) > $cap ) {
+ $result = substr( $result, 0, $cap );
+ }
+
+ return $result;
+ };
+
+ $base = $this->read(
+ $manager,
+ $packFile,
+ $offset - $neg,
+ 0,
+ $readShaBaseFn
+ );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $base
+ );
+ }
+ } else {
+ $baseSha = bin2hex( $stream->read( 20 ) );
+ $baseSize = $getSizeShaFn( $baseSha );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpStream = BufferedReader::createTemp();
+ $written = false;
+
+ foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
+ $tmpStream->write( $chunk );
+
+ $written = true;
+ }
+
+ if( $written ) {
+ $tmpStream->rewind();
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $tmpStream
+ );
+ }
+ } else {
+ $chunks = [];
+ $written = false;
+
+ foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
+ $chunks[] = $chunk;
+ $written = true;
+ }
+
+ if( $written ) {
+ $base = implode( '', $chunks );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $base
+ );
+ }
+ }
+ }
+ }
+ }
+
+ private function resolveBaseToTempFile(
+ PackStreamManager $manager,
+ string $packFile,
+ int $baseOffset,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): StreamReader {
+ $result = BufferedReader::createTemp();
+
+ foreach( $this->streamEntryGenerator(
+ $manager,
+ $packFile,
+ $baseOffset,
+ $depth + 1,
+ $getSizeShaFn,
+ $streamShaFn
+ ) as $chunk ) {
+ $result->write( $chunk );
+ }
+
+ $result->rewind();
+
+ return $result;
+ }
+
+ private function readVarInt( StreamReader $stream ): array {
+ $data = $stream->read( 12 );
+ $byte = isset( $data[0] ) ? ord( $data[0] ) : 0;
+ $val = $byte & 15;
+ $shft = 4;
+ $fst = $byte;
+ $pos = 1;
+
+ while( $byte & 128 ) {
+ $byte = isset( $data[$pos] ) ? ord( $data[$pos++] ) : 0;
+ $val |= ($byte & 127) << $shft;
+ $shft += 7;
+ }
+
+ $rem = strlen( $data ) - $pos;
+
+ $rem > 0 ? $stream->seek( -$rem, SEEK_CUR ) : null;
+
+ return [ 'value' => $val, 'byte' => $fst ];
+ }
+
+ private function readOffsetDelta( StreamReader $stream ): int {
+ $data = $stream->read( 12 );
+ $byte = isset( $data[0] ) ? ord( $data[0] ) : 0;
+ $result = $byte & 127;
+ $pos = 1;
+
+ while( $byte & 128 ) {
+ $byte = isset( $data[$pos] ) ? ord( $data[$pos++] ) : 0;
+ $result = ($result + 1) << 7 | $byte & 127;
+ }
+
+ $rem = strlen( $data ) - $pos;
+
+ $rem > 0 ? $stream->seek( -$rem, SEEK_CUR ) : null;
return $result;
Delta395 lines added, 398 lines removed, 3-line decrease