Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Extracts buffered stream concept

AuthorDave Jarvis <email>
Date2026-02-21 16:54:18 GMT-0800
Commit3ec62a938b42361ec3907d807b7c52ad519ca42c
Parent20c681c
git/BufferedReader.php
+<?php
+require_once __DIR__ . '/StreamReader.php';
+
+class BufferedFileReader implements StreamReader {
+ private mixed $handle;
+ private bool $temporary;
+
+ private function __construct( mixed $handle, bool $temporary ) {
+ $this->handle = $handle;
+ $this->temporary = $temporary;
+ }
+
+ public static function open( string $path ): self {
+ return new self( @fopen( $path, 'rb' ), false );
+ }
+
+ public static function createTemp(): self {
+ return new self( @tmpfile(), true );
+ }
+
+ private function isOpen(): bool {
+ return is_resource( $this->handle );
+ }
+
+ public function __destruct() {
+ $this->isOpen() ? fclose( $this->handle ) : null;
+ }
+
+ public function read( int $length ): string {
+ return ! $this->eof()
+ ? (string)fread( $this->handle, $length )
+ : '';
+ }
+
+ public function write( string $data ): bool {
+ return $this->temporary && $this->isOpen()
+ ? fwrite( $this->handle, $data ) !== false
+ : false;
+ }
+
+ public function seek( int $offset, int $whence = SEEK_SET ): bool {
+ return $this->isOpen()
+ ? fseek( $this->handle, $offset, $whence ) === 0
+ : false;
+ }
+
+ public function tell(): int {
+ return $this->isOpen() ? (int)ftell( $this->handle ) : 0;
+ }
+
+ public function eof(): bool {
+ return $this->isOpen() ? feof( $this->handle ) : true;
+ }
+
+ public function rewind(): void {
+ $this->isOpen() ? rewind( $this->handle ) : null;
+ }
+}
git/GitPacks.php
0,
function( string $baseSha, int $cap ): string {
- $val = '';
-
- if( $cap > 0 ) {
- $val = $this->peek( $baseSha, $cap );
- } else {
- $val = $this->read( $baseSha );
- }
-
- return $val;
+ return $cap > 0
+ ? $this->peek( $baseSha, $cap )
+ : $this->read( $baseSha );
}
);
git/PackEntryReader.php
require_once __DIR__ . '/DeltaDecoder.php';
require_once __DIR__ . '/CompressionStream.php';
-
-class PackEntryReader {
- private const MAX_DEPTH = 200;
- private const MAX_BASE_RAM = 2097152;
-
- private DeltaDecoder $decoder;
-
- public function __construct( DeltaDecoder $decoder ) {
- $this->decoder = $decoder;
- }
-
- public function getSize(
- FileHandlePool $pool,
- string $packFile,
- int $offset
- ): int {
- return $pool->computeInt(
- $packFile,
- function( mixed $handle ) use ( $offset ): int {
- fseek( $handle, $offset );
-
- $header = $this->readVarInt( $handle );
- $size = $header['value'];
- $type = $header['byte'] >> 4 & 7;
-
- if( $type === 6 || $type === 7 ) {
- $size = $this->decoder->readDeltaTargetSize( $handle, $type );
- }
-
- return $size;
- },
- 0
- );
- }
-
- public function read(
- FileHandlePool $pool,
- string $packFile,
- int $offset,
- int $cap,
- callable $readShaBaseFn
- ): string {
- return $pool->computeStringDedicated(
- $packFile,
- function( mixed $handle ) use (
- $offset,
- $cap,
- $readShaBaseFn
- ): string {
- return $this->readWithHandle(
- $handle,
- $offset,
- $cap,
- $readShaBaseFn
- );
- },
- ''
- );
- }
-
- private function readWithHandle(
- mixed $handle,
- int $offset,
- int $cap,
- callable $readShaBaseFn
- ): string {
- fseek( $handle, $offset );
-
- $header = $this->readVarInt( $handle );
- $type = $header['byte'] >> 4 & 7;
- $result = '';
-
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $handle );
- $cur = ftell( $handle );
- $base = $offset - $neg;
- $bData = $this->readWithHandle(
- $handle,
- $base,
- $cap,
- $readShaBaseFn
- );
-
- fseek( $handle, $cur );
-
- $delta = $this->inflate( $handle );
- $result = $this->decoder->apply( $bData, $delta, $cap );
- } elseif( $type === 7 ) {
- $sha = bin2hex( fread( $handle, 20 ) );
- $bas = $readShaBaseFn( $sha, $cap );
- $del = $this->inflate( $handle );
- $result = $this->decoder->apply( $bas, $del, $cap );
- } else {
- $result = $this->inflate( $handle, $cap );
- }
-
- return $result;
- }
-
- public function streamRawCompressed(
- FileHandlePool $pool,
- string $packFile,
- int $offset
- ): Generator {
- yield from $pool->streamGenerator(
- $packFile,
- function( mixed $handle ) use ( $offset ): Generator {
- fseek( $handle, $offset );
-
- $header = $this->readVarInt( $handle );
- $type = $header['byte'] >> 4 & 7;
-
- if( $type !== 6 && $type !== 7 ) {
- $stream = CompressionStream::createExtractor();
-
- yield from $stream->stream( $handle );
- }
- }
- );
- }
-
- public function streamEntryGenerator(
- FileHandlePool $pool,
- string $packFile,
- int $offset,
- int $depth,
- callable $getSizeShaFn,
- callable $streamShaFn
- ): Generator {
- yield from $pool->streamGeneratorDedicated(
- $packFile,
- function( mixed $handle ) use (
- $pool,
- $packFile,
- $offset,
- $depth,
- $getSizeShaFn,
- $streamShaFn
- ): Generator {
- fseek( $handle, $offset );
-
- $header = $this->readVarInt( $handle );
- $type = $header['byte'] >> 4 & 7;
-
- if( $type === 6 || $type === 7 ) {
- yield from $this->streamDeltaObjectGenerator(
- $handle,
- $pool,
- $packFile,
- $offset,
- $type,
- $depth,
- $getSizeShaFn,
- $streamShaFn
- );
- } else {
- $stream = CompressionStream::createInflater();
-
- yield from $stream->stream( $handle );
- }
- }
- );
- }
-
- private function streamDeltaObjectGenerator(
- mixed $handle,
- FileHandlePool $pool,
- string $packFile,
- int $offset,
- int $type,
- int $depth,
- callable $getSizeShaFn,
- callable $streamShaFn
- ): Generator {
- if( $depth < self::MAX_DEPTH ) {
- if( $type === 6 ) {
- $neg = $this->readOffsetDelta( $handle );
- $baseSize = $this->getSize( $pool, $packFile, $offset - $neg );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpHandle = $this->resolveBaseToTempFile(
- $pool,
- $packFile,
- $offset - $neg,
- $depth,
- $getSizeShaFn,
- $streamShaFn
- );
-
- if( $tmpHandle !== false ) {
- yield from $this->decoder->applyStreamGenerator(
- $handle,
- $tmpHandle
- );
-
- fclose( $tmpHandle );
- }
- } else {
- $readShaBaseFn = function( string $sha, int $cap ) use ( $streamShaFn, $depth ): string {
- $chunks = [];
-
- foreach( $streamShaFn( $sha, $depth + 1 ) as $chunk ) {
- $chunks[] = $chunk;
- }
-
- $result = implode( '', $chunks );
-
- return $cap > 0 && strlen( $result ) > $cap ? substr( $result, 0, $cap ) : $result;
- };
-
- $base = $this->read( $pool, $packFile, $offset - $neg, 0, $readShaBaseFn );
-
- yield from $this->decoder->applyStreamGenerator(
- $handle,
- $base
- );
- }
- } else {
- $baseSha = bin2hex( fread( $handle, 20 ) );
- $baseSize = $getSizeShaFn( $baseSha );
-
- if( $baseSize > self::MAX_BASE_RAM ) {
- $tmpHandle = tmpfile();
-
- if( $tmpHandle !== false ) {
- $written = false;
-
- foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
- fwrite( $tmpHandle, $chunk );
-
- $written = true;
- }
-
- if( $written ) {
- rewind( $tmpHandle );
-
- yield from $this->decoder->applyStreamGenerator(
- $handle,
- $tmpHandle
- );
- }
-
- fclose( $tmpHandle );
- }
- } else {
- $chunks = [];
- $written = false;
-
- foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
- $chunks[] = $chunk;
- $written = true;
- }
-
- if( $written ) {
- $base = implode( '', $chunks );
-
- yield from $this->decoder->applyStreamGenerator(
- $handle,
- $base
- );
- }
- }
- }
- }
- }
-
- private function resolveBaseToTempFile(
- FileHandlePool $pool,
- string $packFile,
- int $baseOffset,
- int $depth,
- callable $getSizeShaFn,
- callable $streamShaFn
- ) {
- $tmpHandle = tmpfile();
-
- if( $tmpHandle !== false ) {
- foreach( $this->streamEntryGenerator(
- $pool,
- $packFile,
- $baseOffset,
- $depth + 1,
- $getSizeShaFn,
- $streamShaFn
- ) as $chunk ) {
- fwrite( $tmpHandle, $chunk );
- }
-
- rewind( $tmpHandle );
- }
-
- return $tmpHandle;
- }
-
- private function readVarInt( mixed $handle ): array {
- $byte = ord( fread( $handle, 1 ) );
- $val = $byte & 15;
- $shft = 4;
- $fst = $byte;
-
- while( $byte & 128 ) {
- $byte = ord( fread( $handle, 1 ) );
- $val |= ($byte & 127) << $shft;
- $shft += 7;
- }
-
- return [ 'value' => $val, 'byte' => $fst ];
- }
-
- private function readOffsetDelta( mixed $handle ): int {
- $byte = ord( fread( $handle, 1 ) );
- $neg = $byte & 127;
-
- while( $byte & 128 ) {
- $byte = ord( fread( $handle, 1 ) );
- $neg = ($neg + 1) << 7 | $byte & 127;
- }
-
- return $neg;
- }
-
- private function inflate( mixed $handle, int $cap = 0 ): string {
- $stream = CompressionStream::createInflater();
- $chunks = [];
- $len = 0;
- $result = '';
-
- foreach( $stream->stream( $handle ) as $data ) {
+require_once __DIR__ . '/BufferedReader.php';
+
+class PackEntryReader {
+ private const MAX_DEPTH = 200;
+ private const MAX_BASE_RAM = 2097152;
+
+ private DeltaDecoder $decoder;
+
+ public function __construct( DeltaDecoder $decoder ) {
+ $this->decoder = $decoder;
+ }
+
+ public function getSize(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset
+ ): int {
+ $result = $pool->computeInt(
+ $packFile,
+ function( StreamReader $stream ) use ( $offset ): int {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $size = $header['value'];
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ $size = $this->decoder->readDeltaTargetSize( $stream, $type );
+ }
+
+ return $size;
+ },
+ 0
+ );
+
+ return $result;
+ }
+
+ public function read(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset,
+ int $cap,
+ callable $readShaBaseFn
+ ): string {
+ $result = $pool->computeStringDedicated(
+ $packFile,
+ function( StreamReader $stream ) use (
+ $offset,
+ $cap,
+ $readShaBaseFn
+ ): string {
+ $result = $this->readWithStream(
+ $stream,
+ $offset,
+ $cap,
+ $readShaBaseFn
+ );
+
+ return $result;
+ },
+ ''
+ );
+
+ return $result;
+ }
+
+ private function readWithStream(
+ StreamReader $stream,
+ int $offset,
+ int $cap,
+ callable $readShaBaseFn
+ ): string {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $type = $header['byte'] >> 4 & 7;
+ $result = '';
+
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $stream );
+ $cur = $stream->tell();
+ $base = $offset - $neg;
+ $bData = $this->readWithStream(
+ $stream,
+ $base,
+ $cap,
+ $readShaBaseFn
+ );
+
+ $stream->seek( $cur );
+
+ $delta = $this->inflate( $stream );
+ $result = $this->decoder->apply( $bData, $delta, $cap );
+ } elseif( $type === 7 ) {
+ $sha = bin2hex( $stream->read( 20 ) );
+ $bas = $readShaBaseFn( $sha, $cap );
+ $del = $this->inflate( $stream );
+ $result = $this->decoder->apply( $bas, $del, $cap );
+ } else {
+ $result = $this->inflate( $stream, $cap );
+ }
+
+ return $result;
+ }
+
+ public function streamRawCompressed(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset
+ ): Generator {
+ yield from $pool->streamGenerator(
+ $packFile,
+ function( StreamReader $stream ) use ( $offset ): Generator {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type !== 6 && $type !== 7 ) {
+ $extractor = CompressionStream::createExtractor();
+
+ yield from $extractor->stream( $stream );
+ }
+ }
+ );
+ }
+
+ public function streamEntryGenerator(
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): Generator {
+ yield from $pool->streamGeneratorDedicated(
+ $packFile,
+ function( StreamReader $stream ) use (
+ $pool,
+ $packFile,
+ $offset,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ ): Generator {
+ $stream->seek( $offset );
+
+ $header = $this->readVarInt( $stream );
+ $type = $header['byte'] >> 4 & 7;
+
+ if( $type === 6 || $type === 7 ) {
+ yield from $this->streamDeltaObjectGenerator(
+ $stream,
+ $pool,
+ $packFile,
+ $offset,
+ $type,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ );
+ } else {
+ $inflater = CompressionStream::createInflater();
+
+ yield from $inflater->stream( $stream );
+ }
+ }
+ );
+ }
+
+ private function streamDeltaObjectGenerator(
+ StreamReader $stream,
+ FileHandlePool $pool,
+ string $packFile,
+ int $offset,
+ int $type,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): Generator {
+ if( $depth < self::MAX_DEPTH ) {
+ if( $type === 6 ) {
+ $neg = $this->readOffsetDelta( $stream );
+ $baseSize = $this->getSize(
+ $pool,
+ $packFile,
+ $offset - $neg
+ );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpStream = $this->resolveBaseToTempFile(
+ $pool,
+ $packFile,
+ $offset - $neg,
+ $depth,
+ $getSizeShaFn,
+ $streamShaFn
+ );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $tmpStream
+ );
+ } else {
+ $readShaBaseFn = function(
+ string $sha,
+ int $cap
+ ) use (
+ $streamShaFn,
+ $depth
+ ): string {
+ $chunks = [];
+
+ foreach( $streamShaFn( $sha, $depth + 1 ) as $chunk ) {
+ $chunks[] = $chunk;
+ }
+
+ $result = implode( '', $chunks );
+
+ if( $cap > 0 && strlen( $result ) > $cap ) {
+ $result = substr( $result, 0, $cap );
+ }
+
+ return $result;
+ };
+
+ $base = $this->read(
+ $pool,
+ $packFile,
+ $offset - $neg,
+ 0,
+ $readShaBaseFn
+ );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $base
+ );
+ }
+ } else {
+ $baseSha = bin2hex( $stream->read( 20 ) );
+ $baseSize = $getSizeShaFn( $baseSha );
+
+ if( $baseSize > self::MAX_BASE_RAM ) {
+ $tmpStream = BufferedFileReader::createTemp();
+ $written = false;
+
+ foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
+ $tmpStream->write( $chunk );
+
+ $written = true;
+ }
+
+ if( $written ) {
+ $tmpStream->rewind();
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $tmpStream
+ );
+ }
+ } else {
+ $chunks = [];
+ $written = false;
+
+ foreach( $streamShaFn( $baseSha, $depth + 1 ) as $chunk ) {
+ $chunks[] = $chunk;
+ $written = true;
+ }
+
+ if( $written ) {
+ $base = implode( '', $chunks );
+
+ yield from $this->decoder->applyStreamGenerator(
+ $stream,
+ $base
+ );
+ }
+ }
+ }
+ }
+ }
+
+ private function resolveBaseToTempFile(
+ FileHandlePool $pool,
+ string $packFile,
+ int $baseOffset,
+ int $depth,
+ callable $getSizeShaFn,
+ callable $streamShaFn
+ ): StreamReader {
+ $result = BufferedFileReader::createTemp();
+
+ foreach( $this->streamEntryGenerator(
+ $pool,
+ $packFile,
+ $baseOffset,
+ $depth + 1,
+ $getSizeShaFn,
+ $streamShaFn
+ ) as $chunk ) {
+ $result->write( $chunk );
+ }
+
+ $result->rewind();
+
+ return $result;
+ }
+
+ private function readVarInt( StreamReader $stream ): array {
+ $byte = ord( $stream->read( 1 ) );
+ $val = $byte & 15;
+ $shft = 4;
+ $fst = $byte;
+
+ while( $byte & 128 ) {
+ $byte = ord( $stream->read( 1 ) );
+ $val |= ($byte & 127) << $shft;
+ $shft += 7;
+ }
+
+ return [ 'value' => $val, 'byte' => $fst ];
+ }
+
+ private function readOffsetDelta( StreamReader $stream ): int {
+ $byte = ord( $stream->read( 1 ) );
+ $result = $byte & 127;
+
+ while( $byte & 128 ) {
+ $byte = ord( $stream->read( 1 ) );
+ $result = ($result + 1) << 7 | $byte & 127;
+ }
+
+ return $result;
+ }
+
+ private function inflate( StreamReader $stream, int $cap = 0 ): string {
+ $inflater = CompressionStream::createInflater();
+ $chunks = [];
+ $len = 0;
+ $result = '';
+
+ foreach( $inflater->stream( $stream ) as $data ) {
$chunks[] = $data;
$len += strlen( $data );
git/PackIndex.php
<?php
+require_once __DIR__ . '/StreamReader.php';
+require_once __DIR__ . '/FileHandlePool.php';
+
class PackIndex {
private string $indexFile;
$pool->computeVoid(
$this->indexFile,
- function( mixed $handle ) use ( $sha, $onFound ): void {
- $this->ensureFanout( $handle );
+ function( StreamReader $stream ) use ( $sha, $onFound ): void {
+ $this->ensureFanout( $stream );
if( !empty( $this->fanoutCache ) ) {
- $this->binarySearch( $handle, $sha, $onFound );
+ $this->binarySearch( $stream, $sha, $onFound );
}
}
);
}
- private function ensureFanout( mixed $handle ): void {
+ private function ensureFanout( StreamReader $stream ): void {
if( empty( $this->fanoutCache ) ) {
- fseek( $handle, 0 );
+ $stream->seek( 0 );
- $head = fread( $handle, 8 );
+ $head = $stream->read( 8 );
if( $head === "\377tOc\0\0\0\2" ) {
- $this->fanoutCache = array_values(
- unpack( 'N*', fread( $handle, 1024 ) )
- );
+ $data = $stream->read( 1024 );
+
+ $this->fanoutCache = array_values( unpack( 'N*', $data ) );
}
}
}
private function binarySearch(
- mixed $handle,
+ StreamReader $stream,
string $sha,
callable $onFound
$mid = ($low + $high) >> 1;
$pos = 1032 + $mid * 20;
- $cmp = $this->readShaBytes( $handle, $pos );
+ $cmp = $this->readShaBytes( $stream, $pos );
if( $cmp < $sha ) {
$low = $mid + 1;
} elseif( $cmp > $sha ) {
$high = $mid - 1;
} else {
- $result = $this->readOffset( $handle, $mid );
+ $result = $this->readOffset( $stream, $mid );
}
}
if( $result !== 0 ) {
$onFound( $this->packFile, $result );
}
}
}
-
- private function readShaBytes( mixed $handle, int $pos ): string {
- $result = '';
+ private function readShaBytes( StreamReader $stream, int $pos ): string {
if(
$this->bufferOffset === -1 ||
$pos < $this->bufferOffset ||
$pos + 20 > $this->bufferOffset + 8192
) {
- fseek( $handle, $pos );
+ $stream->seek( $pos );
$this->bufferOffset = $pos;
- $this->buffer = fread( $handle, 8192 );
+ $this->buffer = $stream->read( 8192 );
}
$offset = $pos - $this->bufferOffset;
return substr( $this->buffer, $offset, 20 );
}
- private function readOffset( mixed $handle, int $mid ): int {
- $total = $this->fanoutCache[255];
- $pos = 1032 + $total * 24 + $mid * 4;
+ private function readOffset( StreamReader $stream, int $mid ): int {
+ $total = $this->fanoutCache[255];
+ $pos = 1032 + $total * 24 + $mid * 4;
- fseek( $handle, $pos );
+ $stream->seek( $pos );
- $packed = fread( $handle, 4 );
+ $packed = $stream->read( 4 );
$offset = unpack( 'N', $packed )[1];
if( $offset & 0x80000000 ) {
$pos64 = 1032 + $total * 28 + ($offset & 0x7FFFFFFF) * 8;
- fseek( $handle, $pos64 );
+ $stream->seek( $pos64 );
- $offset = unpack( 'J', fread( $handle, 8 ) )[1];
+ $packed64 = $stream->read( 8 );
+ $offset = unpack( 'J', $packed64 )[1];
}
git/PackLocator.php
$this->indexes = [];
$this->cache = [];
- $packFiles = glob( "{$objectsPath}/pack/*.idx" ) ?: [];
+ $packFiles = glob( "{$objectsPath}/pack/*.idx" );
- foreach( $packFiles as $indexFile ) {
- $this->indexes[] = new PackIndex( $indexFile );
+ if( $packFiles !== false ) {
+ foreach( $packFiles as $indexFile ) {
+ $this->indexes[] = new PackIndex( $indexFile );
+ }
}
}
git/StreamReader.php
+<?php
+interface StreamReader {
+ public function read( int $length ): string;
+
+ public function write( string $data ): bool;
+
+ public function seek( int $offset, int $whence = SEEK_SET ): bool;
+
+ public function tell(): int;
+
+ public function eof(): bool;
+
+ public function rewind(): void;
+}
Delta450 lines added, 364 lines removed, 86-line increase