Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once __DIR__ . '/PackStreamManager.php';
require_once __DIR__ . '/DeltaDecoder.php';
require_once __DIR__ . '/CompressionStream.php';
require_once __DIR__ . '/BufferedReader.php';
require_once __DIR__ . '/PackContext.php';
require_once __DIR__ . '/GitPackStream.php';

class PackEntryReader {
  private const MAX_DEPTH    = 200;
  private const MAX_BASE_RAM = 8388608;
  private const MAX_CACHE    = 1024;

  private DeltaDecoder       $decoder;
  private ZlibDeflatorStream $deflator;
  private ZlibInflaterStream $inflater;
  private array              $cache;
  private int                $cacheSize;

  public function __construct( DeltaDecoder $decoder ) {
    $this->decoder   = $decoder;
    $this->deflator  = new ZlibDeflatorStream();
    $this->inflater  = new ZlibInflaterStream();
    $this->cache     = [];
    $this->cacheSize = 0;
  }

  public function getEntryMeta( PackContext $context ): array {
    return $context->computeArray(
      function( StreamReader $stream, int $offset ): array {
        $packStream = new GitPackStream( $stream );
        $packStream->seek( $offset );
        $hdr = $packStream->readVarInt();

        return [
          'type'       => $hdr['type'],
          'size'       => $hdr['size'],
          'baseOffset' => $hdr['type'] === 6
            ? $offset - $packStream->readOffsetDelta()
            : 0,
          'baseSha'    => $hdr['type'] === 7
            ? \bin2hex( $packStream->read( 20 ) )
            : ''
        ];
      },
      [ 'type' => 0, 'size' => 0 ]
    );
  }

  public function getSize( PackContext $context ): int {
    return $context->computeIntDedicated(
      function( StreamReader $stream, int $offset ): int {
        $packStream = new GitPackStream( $stream );
        $packStream->seek( $offset );
        $hdr = $packStream->readVarInt();

        return $hdr['type'] === 6 || $hdr['type'] === 7
          ? $this->decoder->readDeltaTargetSize(
              $stream, $hdr['type']
            )
          : $hdr['size'];
      },
      0
    );
  }

  public function read(
    PackContext $context,
    int $cap,
    callable $readShaBaseFn
  ): string {
    return $context->computeStringDedicated(
      function(
        StreamReader $s,
        int $o
      ) use ( $cap, $readShaBaseFn ): string {
        return $this->readWithStream(
          new GitPackStream( $s ), $o, $cap, $readShaBaseFn
        );
      },
      ''
    );
  }

  private function readWithStream(
    GitPackStream $stream,
    int $offset,
    int $cap,
    callable $readShaBaseFn
  ): string {
    $stream->seek( $offset );
    $hdr  = $stream->readVarInt();
    $type = $hdr['type'];

    $result = isset( $this->cache[$offset] )
      ? ( $cap > 0 && \strlen( $this->cache[$offset] ) > $cap
          ? \substr( $this->cache[$offset], 0, $cap )
          : $this->cache[$offset] )
      : ( $type === 6
          ? $this->readOffsetDeltaContent(
              $stream, $offset, $cap, $readShaBaseFn
            )
          : ( $type === 7
              ? $this->readRefDeltaContent(
                  $stream, $cap, $readShaBaseFn
                )
              : $this->inflate( $stream, $cap ) ) );

    if( $cap === 0 && !isset( $this->cache[$offset] ) ) {
      $this->cache[$offset] = $result;
      $this->cacheSize++;

      if( $this->cacheSize > self::MAX_CACHE ) {
        unset( $this->cache[\array_key_first( $this->cache )] );
        $this->cacheSize--;
      }
    }

    return $result;
  }

  private function readOffsetDeltaContent(
    GitPackStream $stream,
    int $offset,
    int $cap,
    callable $readShaBaseFn
  ): string {
    $neg   = $stream->readOffsetDelta();
    $cur   = $stream->tell();
    $bData = $this->readWithStream(
      $stream, $offset - $neg, $cap, $readShaBaseFn
    );

    $stream->seek( $cur );

    return $this->decoder->apply(
      $bData,
      $this->inflate( $stream ),
      $cap
    );
  }

  private function readRefDeltaContent(
    GitPackStream $stream,
    int $cap,
    callable $readShaBaseFn
  ): string {
    $sha = \bin2hex( $stream->read( 20 ) );
    $cur = $stream->tell();
    $bas = $readShaBaseFn( $sha, $cap );

    $stream->seek( $cur );

    return $this->decoder->apply(
      $bas,
      $this->inflate( $stream ),
      $cap
    );
  }

  public function streamRawCompressed(
    PackContext $context
  ): Generator {
    yield from $context->streamGenerator(
      function( StreamReader $stream, int $offset ): Generator {
        $packStream = new GitPackStream( $stream );
        $packStream->seek( $offset );
        $hdr = $packStream->readVarInt();

        yield from $hdr['type'] !== 6 && $hdr['type'] !== 7
          ? $this->deflator->stream( $stream )
          : [];
      }
    );
  }

  public function streamRawDelta( PackContext $context ): Generator {
    yield from $context->streamGenerator(
      function( StreamReader $stream, int $offset ): Generator {
        $packStream = new GitPackStream( $stream );
        $packStream->seek( $offset );
        $hdr = $packStream->readVarInt();

        if( $hdr['type'] === 6 ) {
          $packStream->readOffsetDelta();
        } elseif( $hdr['type'] === 7 ) {
          $packStream->read( 20 );
        }

        yield from $this->deflator->stream( $stream );
      }
    );
  }

  public function streamEntryGenerator(
    PackContext $context
  ): Generator {
    yield from $context->streamGeneratorDedicated(
      function(
        StreamReader $stream,
        int $offset
      ) use ( $context ): Generator {
        $packStream = new GitPackStream( $stream );
        $packStream->seek( $offset );
        $hdr = $packStream->readVarInt();

        yield from $hdr['type'] === 6 || $hdr['type'] === 7
          ? $this->streamDeltaObjectGenerator(
              $packStream, $context, $hdr['type'], $offset
            )
          : $this->inflater->stream( $stream );
      }
    );
  }

  private function streamDeltaObjectGenerator(
    GitPackStream $stream,
    PackContext $context,
    int $type,
    int $offset
  ): Generator {
    yield from $context->isWithinDepth( self::MAX_DEPTH )
      ? ( $type === 6
          ? $this->processOffsetDelta( $stream, $context, $offset )
          : $this->processRefDelta( $stream, $context ) )
      : [];
  }

  private function readSizeWithStream(
    GitPackStream $stream,
    int $offset
  ): int {
    $cur = $stream->tell();
    $stream->seek( $offset );
    $hdr = $stream->readVarInt();

    $result = isset( $this->cache[$offset] )
      ? \strlen( $this->cache[$offset] )
      : ( $hdr['type'] === 6 || $hdr['type'] === 7
          ? $this->decoder->readDeltaTargetSize(
              $stream, $hdr['type']
            )
          : $hdr['size'] );

    if( !isset( $this->cache[$offset] ) ) {
      $stream->seek( $cur );
    }

    return $result;
  }

  private function processOffsetDelta(
    GitPackStream $stream,
    PackContext $context,
    int $offset
  ): Generator {
    $neg     = $stream->readOffsetDelta();
    $cur     = $stream->tell();
    $baseOff = $offset - $neg;

    $baseSrc = isset( $this->cache[$baseOff] )
      ? $this->cache[$baseOff]
      : ( $this->readSizeWithStream( $stream, $baseOff )
          <= self::MAX_BASE_RAM
          ? $this->readWithStream(
              $stream,
              $baseOff,
              0,
              function( string $sha, int $cap ) use ( $context ): string {
                return $this->resolveBaseSha( $sha, $cap, $context );
              }
            )
          : $this->collectBase(
              $this->streamEntryGenerator(
                $context->deriveOffsetContext( $neg )
              )
            ) );

    $stream->seek( $cur );

    yield from $this->decoder->applyStreamGenerator(
      $stream, $baseSrc
    );
  }

  private function processRefDelta(
    GitPackStream $stream,
    PackContext $context
  ): Generator {
    $baseSha = \bin2hex( $stream->read( 20 ) );
    $cur     = $stream->tell();
    $size    = $context->resolveBaseSize( $baseSha );

    $baseSrc = $size <= self::MAX_BASE_RAM
      ? $this->resolveBaseSha( $baseSha, 0, $context )
      : $this->collectBase(
          $context->resolveBaseStream( $baseSha )
        );

    $stream->seek( $cur );

    yield from $this->decoder->applyStreamGenerator(
      $stream, $baseSrc
    );
  }

  private function collectBase(
    iterable $chunks
  ): BufferedReader|string {
    $parts = [];
    $total = 0;
    $tmp   = false;

    foreach( $chunks as $chunk ) {
      $total += \strlen( $chunk );

      if( $tmp instanceof BufferedReader ) {
        $tmp->write( $chunk );
      } elseif( $total > self::MAX_BASE_RAM ) {
        $tmp = new BufferedReader(
          'php://temp/maxmemory:65536', 'w+b'
        );

        foreach( $parts as $part ) {
          $tmp->write( $part );
        }

        $tmp->write( $chunk );
        $parts = [];
      } else {
        $parts[] = $chunk;
      }
    }

    if( $tmp instanceof BufferedReader ) {
      $tmp->rewind();
    }

    return $tmp === false ? \implode( '', $parts ) : $tmp;
  }

  private function resolveBaseSha(
    string $sha,
    int $cap,
    PackContext $context
  ): string {
    $chunks = [];

    foreach( $context->resolveBaseStream( $sha ) as $chunk ) {
      $chunks[] = $chunk;
    }

    $result = \implode( '', $chunks );

    return $cap > 0 && \strlen( $result ) > $cap
      ? \substr( $result, 0, $cap )
      : $result;
  }

  private function inflate(
    StreamReader $stream,
    int $cap = 0
  ): string {
    $chunks = [];
    $len    = 0;

    foreach( $this->inflater->stream( $stream ) as $data ) {
      $chunks[]  = $data;
      $len      += \strlen( $data );

      if( $cap > 0 && $len >= $cap ) {
        break;
      }
    }

    $result = \implode( '', $chunks );

    return $cap > 0 && \strlen( $result ) > $cap
      ? \substr( $result, 0, $cap )
      : $result;
  }
}