Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
class GitPacks {
  private const MAX_READ = 1040576;
  private const MAX_RAM = 1048576;

  private string $objectsPath;
  private array $packFiles;
  private ?string $lastPack = null;

  private array $fileHandles       = [];
  private array $fanoutCache       = [];
  private array $shaBucketCache    = [];
  private array $offsetBucketCache = [];

  public function __construct( string $objectsPath ) {
    $this->objectsPath = $objectsPath;
    $this->packFiles   = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
  }

  public function __destruct() {
    foreach( $this->fileHandles as $handle ) {
      if( is_resource( $handle ) ) {
        fclose( $handle );
      }
    }
  }

  public function peek( string $sha, int $len = 12 ): ?string {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] === -1 ) {
      return null;
    }

    $handle = $this->getHandle( $info['file'] );

    if( !$handle ) {
      return null;
    }

    return $this->readPackEntry( $handle, $info['offset'], $len, $len );
  }

  public function read( string $sha ): ?string {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] === -1 ) {
      return null;
    }

    $size = $this->extractPackedSize( $info['file'], $info['offset'] );

    if( $size > self::MAX_RAM ) {
      return null;
    }

    $handle = $this->getHandle( $info['file'] );

    return $handle
      ? $this->readPackEntry( $handle, $info['offset'], $size )
      : null;
  }

  public function stream( string $sha, callable $callback ): bool {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] === -1 ) {
      return false;
    }

    $size   = $this->extractPackedSize( $info['file'], $info['offset'] );
    $handle = $this->getHandle( $info['file'] );

    if( !$handle ) {
      return false;
    }

    return $this->streamPackEntry(
      $handle,
      $info['offset'],
      $size,
      $callback
    );
  }

  public function getSize( string $sha ): ?int {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] === -1 ) {
      return null;
    }

    return $this->extractPackedSize( $info['file'], $info['offset'] );
  }

  private function findPackInfo( string $sha ): array {
    if( !ctype_xdigit( $sha ) || strlen( $sha ) !== 40 ) {
      return ['offset' => -1];
    }

    $binarySha = hex2bin( $sha );

    if( $this->lastPack ) {
      $offset = $this->findInIdx( $this->lastPack, $binarySha );

      if( $offset !== -1 ) {
        return $this->makeResult( $this->lastPack, $offset );
      }
    }

    foreach( $this->packFiles as $indexFile ) {
      if( $indexFile === $this->lastPack ) {
        continue;
      }

      $offset = $this->findInIdx( $indexFile, $binarySha );

      if( $offset !== -1 ) {
        $this->lastPack = $indexFile;

        return $this->makeResult( $indexFile, $offset );
      }
    }

    return ['offset' => -1];
  }

  private function makeResult( string $indexPath, int $offset ): array {
    return [
      'file'   => str_replace( '.idx', '.pack', $indexPath ),
      'offset' => $offset
    ];
  }

  private function findInIdx( string $indexFile, string $binarySha ): int {
    $fileHandle = $this->getHandle( $indexFile );

    if( !$fileHandle ) {
      return -1;
    }

    if( !isset( $this->fanoutCache[$indexFile] ) ) {
      fseek( $fileHandle, 0 );

      if( fread( $fileHandle, 8 ) === "\377tOc\0\0\0\2" ) {
        $this->fanoutCache[$indexFile] = array_values(
          unpack( 'N*', fread( $fileHandle, 1024 ) )
        );
      } else {
        return -1;
      }
    }

    $fanout = $this->fanoutCache[$indexFile];

    $firstByte = ord( $binarySha[0] );
    $start     = $firstByte === 0 ? 0 : $fanout[$firstByte - 1];
    $end       = $fanout[$firstByte];

    if( $end <= $start ) {
      return -1;
    }

    $cacheKey = "$indexFile:$firstByte";

    if( !isset( $this->shaBucketCache[$cacheKey] ) ) {
      $count = $end - $start;

      fseek( $fileHandle, 1032 + ($start * 20) );

      $this->shaBucketCache[$cacheKey] = fread( $fileHandle, $count * 20 );

      fseek(
        $fileHandle,
        1032 + ($fanout[255] * 24) + ($start * 4)
      );

      $this->offsetBucketCache[$cacheKey] = fread( $fileHandle, $count * 4 );
    }

    $shaBlock = $this->shaBucketCache[$cacheKey];
    $count    = strlen( $shaBlock ) / 20;
    $low      = 0;
    $high     = $count - 1;
    $foundIdx = -1;

    while( $low <= $high ) {
      $mid     = ($low + $high) >> 1;
      $compare = substr( $shaBlock, $mid * 20, 20 );

      if( $compare < $binarySha ) {
        $low = $mid + 1;
      } elseif( $compare > $binarySha ) {
        $high = $mid - 1;
      } else {
        $foundIdx = $mid;
        break;
      }
    }

    if( $foundIdx === -1 ) {
      return -1;
    }

    $offsetData = substr(
      $this->offsetBucketCache[$cacheKey],
      $foundIdx * 4,
      4
    );

    $offset = unpack( 'N', $offsetData )[1];

    if( $offset & 0x80000000 ) {
      $packTotal = $fanout[255];
      $pos64     = 1032 + ($packTotal * 28) +
                   (($offset & 0x7FFFFFFF) * 8);

      fseek( $fileHandle, $pos64 );

      $offset = unpack( 'J', fread( $fileHandle, 8 ) )[1];
    }

    return (int)$offset;
  }

  private function readPackEntry(
    $fileHandle,
    int $offset,
    int $expectedSize,
    int $cap = 0
  ): string {
    fseek( $fileHandle, $offset );

    $header = $this->readVarInt( $fileHandle );
    $type   = ($header['byte'] >> 4) & 7;

    if( $type === 6 ) {
      return $this->handleOfsDelta(
        $fileHandle,
        $offset,
        $expectedSize,
        $cap
      );
    }

    if( $type === 7 ) {
      return $this->handleRefDelta( $fileHandle, $expectedSize, $cap );
    }

    return $this->decompressToString( $fileHandle, $expectedSize, $cap );
  }

  private function streamPackEntry(
    $fileHandle,
    int $offset,
    int $expectedSize,
    callable $callback
  ): bool {
    fseek( $fileHandle, $offset );

    $header = $this->readVarInt( $fileHandle );
    $type   = ($header['byte'] >> 4) & 7;

    if( $type === 6 || $type === 7 ) {
      return $this->streamDeltaObject(
        $fileHandle,
        $offset,
        $type,
        $expectedSize,
        $callback
      );
    }

    return $this->streamDecompression( $fileHandle, $callback );
  }

  private function streamDeltaObject(
    $fileHandle,
    int $offset,
    int $type,
    int $expectedSize,
    callable $callback
  ): bool {
    fseek( $fileHandle, $offset );
    $this->readVarInt( $fileHandle );

    if( $type === 6 ) {
      $byte     = ord( fread( $fileHandle, 1 ) );
      $negative = $byte & 127;

      while( $byte & 128 ) {
        $byte     = ord( fread( $fileHandle, 1 ) );
        $negative = (($negative + 1) << 7) | ($byte & 127);
      }

      $deltaPos   = ftell( $fileHandle );
      $baseOffset = $offset - $negative;

      $base = '';

      $this->streamPackEntry(
        $fileHandle,
        $baseOffset,
        0,
        function( $chunk ) use ( &$base ) { $base .= $chunk; }
      );

      fseek( $fileHandle, $deltaPos );
    } else {
      $baseSha = bin2hex( fread( $fileHandle, 20 ) );

      $base     = '';
      $streamed = $this->stream(
        $baseSha,
        function( $chunk ) use ( &$base ) { $base .= $chunk; }
      );

      if( !$streamed ) {
        return false;
      }
    }

    $inflator = inflate_init( ZLIB_ENCODING_DEFLATE );

    if( $inflator === false ) {
      return false;
    }

    // 0: source size, 1: target size, 2: opcodes
    $headerState = 0;
    $buffer      = '';

    while( !feof( $fileHandle ) ) {
      // Read small chunks to prevent memory spikes
      $chunk = fread( $fileHandle, 8192 );

      if( $chunk === false || $chunk === '' ) {
        break;
      }

      $data = @inflate_add( $inflator, $chunk );

      if( $data === false ) {
        break;
      }

      $buffer .= $data;

      // Process the buffer
      while( true ) {
        $bufLen = strlen( $buffer );

        if( $bufLen === 0 ) {
          break;
        }

        if( $headerState < 2 ) {
          $pos = 0;

          while( $pos < $bufLen && (ord( $buffer[$pos] ) & 128) ) {
            $pos++;
          }

          if( $pos === $bufLen && (ord( $buffer[$pos - 1] ) & 128) ) {
            break;
          }

          $pos++;
          $buffer = substr( $buffer, $pos );
          $headerState++;
          continue;
        }

        $opcode = ord( $buffer[0] );

        if( $opcode & 128 ) {
          $needed = 1;
          if( $opcode & 0x01 ) { $needed++; }
          if( $opcode & 0x02 ) { $needed++; }
          if( $opcode & 0x04 ) { $needed++; }
          if( $opcode & 0x08 ) { $needed++; }
          if( $opcode & 0x10 ) { $needed++; }
          if( $opcode & 0x20 ) { $needed++; }
          if( $opcode & 0x40 ) { $needed++; }

          if( $bufLen < $needed ) {
            break;
          }

          $off = 0;
          $len = 0;
          $p   = 1;

          if( $opcode & 0x01 ) { $off |= ord( $buffer[$p++] ); }
          if( $opcode & 0x02 ) { $off |= ord( $buffer[$p++] ) << 8; }
          if( $opcode & 0x04 ) { $off |= ord( $buffer[$p++] ) << 16; }
          if( $opcode & 0x08 ) { $off |= ord( $buffer[$p++] ) << 24; }

          if( $opcode & 0x10 ) { $len |= ord( $buffer[$p++] ); }
          if( $opcode & 0x20 ) { $len |= ord( $buffer[$p++] ) << 8; }
          if( $opcode & 0x40 ) { $len |= ord( $buffer[$p++] ) << 16; }

          if( $len === 0 ) { $len = 0x10000; }

          $callback( substr( $base, $off, $len ) );
          $buffer = substr( $buffer, $needed );

        } else {
          $len = $opcode & 127;

          if( $bufLen < 1 + $len ) {
            break;
          }

          $callback( substr( $buffer, 1, $len ) );
          $buffer = substr( $buffer, 1 + $len );
        }
      }

      if( inflate_get_status( $inflator ) === ZLIB_STREAM_END ) {
        break;
      }
    }

    return true;
  }

  private function streamDecompression(
    $fileHandle,
    callable $callback
  ): bool {
    $inflator = inflate_init( ZLIB_ENCODING_DEFLATE );

    if( $inflator === false ) {
      return false;
    }

    while( !feof( $fileHandle ) ) {
      $chunk = fread( $fileHandle, 8192 );

      if( $chunk === false || $chunk === '' ) {
        break;
      }

      $data = @inflate_add( $inflator, $chunk );

      if( $data !== false && $data !== '' ) {
        $callback( $data );
      }

      if(
        $data === false ||
        inflate_get_status( $inflator ) === ZLIB_STREAM_END
      ) {
        break;
      }
    }

    return true;
  }

  private function decompressToString(
    $fileHandle,
    int $maxSize,
    int $cap = 0
  ): string {
    $inflator = inflate_init( ZLIB_ENCODING_DEFLATE );

    if( $inflator === false ) {
      return '';
    }

    $result = '';

    while( !feof( $fileHandle ) ) {
      $chunk = fread( $fileHandle, 8192 );

      if( $chunk === false || $chunk === '' ) {
        break;
      }

      $data = @inflate_add( $inflator, $chunk );

      if( $data !== false ) {
        $result .= $data;
      }

      if( $cap > 0 && strlen( $result ) >= $cap ) {
        return substr( $result, 0, $cap );
      }

      if(
        $data === false ||
        inflate_get_status( $inflator ) === ZLIB_STREAM_END
      ) {
        break;
      }
    }

    return $result;
  }

  private function extractPackedSize( string $packPath, int $offset ): int {
    $fileHandle = $this->getHandle( $packPath );

    if( !$fileHandle ) {
      return 0;
    }

    fseek( $fileHandle, $offset );

    $header = $this->readVarInt( $fileHandle );
    $size   = $header['value'];
    $type   = ($header['byte'] >> 4) & 7;

    if( $type === 6 || $type === 7 ) {
      return $this->readDeltaTargetSize( $fileHandle, $type );
    }

    return $size;
  }

  private function handleOfsDelta(
    $fileHandle,
    int $offset,
    int $expectedSize,
    int $cap = 0
  ): string {
    $byte     = ord( fread( $fileHandle, 1 ) );
    $negative = $byte & 127;

    while( $byte & 128 ) {
      $byte     = ord( fread( $fileHandle, 1 ) );
      $negative = (($negative + 1) << 7) | ($byte & 127);
    }

    $currentPos = ftell( $fileHandle );
    $baseOffset = $offset - $negative;

    fseek( $fileHandle, $baseOffset );

    $baseHeader = $this->readVarInt( $fileHandle );
    $baseSize   = $baseHeader['value'];

    fseek( $fileHandle, $baseOffset );

    $base = $this->readPackEntry( $fileHandle, $baseOffset, $baseSize, $cap );

    fseek( $fileHandle, $currentPos );

    $remainingBytes = min( self::MAX_READ, max( $expectedSize * 2, 1048576 ) );
    $compressed     = fread( $fileHandle, $remainingBytes );
    $delta          = @gzuncompress( $compressed ) ?: '';

    return $this->applyDelta( $base, $delta, $cap );
  }

  private function handleRefDelta(
    $fileHandle,
    int $expectedSize,
    int $cap = 0
  ): string {
    $baseSha = bin2hex( fread( $fileHandle, 20 ) );

    if( $cap > 0 ) {
      $base = $this->peek( $baseSha, $cap ) ?? '';
    } else {
      $base = $this->read( $baseSha ) ?? '';
    }

    $remainingBytes = min( self::MAX_READ, max( $expectedSize * 2, 1048576 ) );
    $compressed     = fread( $fileHandle, $remainingBytes );
    $delta          = @gzuncompress( $compressed ) ?: '';

    return $this->applyDelta( $base, $delta, $cap );
  }

  private function applyDelta( string $base, string $delta, int $cap = 0 ): string {
    $position = 0;

    $this->skipSize( $delta, $position );
    $this->skipSize( $delta, $position );

    $output      = '';
    $deltaLength = strlen( $delta );

    while( $position < $deltaLength ) {
      if( $cap > 0 && strlen( $output ) >= $cap ) {
        break;
      }

      $opcode = ord( $delta[$position++] );

      if( $opcode & 128 ) {
        $offset = 0;
        $length = 0;

        if( $opcode & 0x01 ) { $offset |= ord( $delta[$position++] ); }
        if( $opcode & 0x02 ) { $offset |= ord( $delta[$position++] ) << 8; }
        if( $opcode & 0x04 ) { $offset |= ord( $delta[$position++] ) << 16; }
        if( $opcode & 0x08 ) { $offset |= ord( $delta[$position++] ) << 24; }

        if( $opcode & 0x10 ) { $length |= ord( $delta[$position++] ); }
        if( $opcode & 0x20 ) { $length |= ord( $delta[$position++] ) << 8; }
        if( $opcode & 0x40 ) { $length |= ord( $delta[$position++] ) << 16; }

        if( $length === 0 ) { $length = 0x10000; }

        $output .= substr( $base, $offset, $length );
      } else {
        $length = $opcode & 127;
        $output .= substr( $delta, $position, $length );
        $position += $length;
      }
    }

    return $output;
  }

  private function readVarInt( $fileHandle ): array {
    $byte  = ord( fread( $fileHandle, 1 ) );
    $value = $byte & 15;
    $shift = 4;
    $first = $byte;

    while( $byte & 128 ) {
      $byte  = ord( fread( $fileHandle, 1 ) );
      $value |= (($byte & 127) << $shift);
      $shift += 7;
    }

    return ['value' => $value, 'byte' => $first];
  }

  private function readDeltaTargetSize( $fileHandle, int $type ): int {
    if( $type === 6 ) {
      $byte = ord( fread( $fileHandle, 1 ) );

      while( $byte & 128 ) {
        $byte = ord( fread( $fileHandle, 1 ) );
      }
    } else {
      fseek( $fileHandle, 20, SEEK_CUR );
    }

    $inflator = inflate_init( ZLIB_ENCODING_DEFLATE );

    if( $inflator === false ) {
      return 0;
    }

    $header      = '';
    $attempts    = 0;
    $maxAttempts = 64;

    while(
      !feof( $fileHandle ) &&
      strlen( $header ) < 32 &&
      $attempts < $maxAttempts
    ) {
      $chunk = fread( $fileHandle, 512 );

      if( $chunk === false || $chunk === '' ) {
        break;
      }

      $output = @inflate_add( $inflator, $chunk, ZLIB_NO_FLUSH );

      if( $output !== false ) {
        $header .= $output;
      }

      if( inflate_get_status( $inflator ) === ZLIB_STREAM_END ) {
        break;
      }

      $attempts++;
    }

    $position = 0;

    if( strlen( $header ) > 0 ) {
      $this->skipSize( $header, $position );

      return $this->readSize( $header, $position );
    }

    return 0;
  }

  private function skipSize( string $data, int &$position ): void {
    $length = strlen( $data );

    while( $position < $length && (ord( $data[$position++] ) & 128) ) {
    }
  }

  private function readSize( string $data, int &$position ): int {
    $byte  = ord( $data[$position++] );
    $value = $byte & 127;
    $shift = 7;

    while( $byte & 128 ) {
      $byte  = ord( $data[$position++] );
      $value |= (($byte & 127) << $shift);
      $shift += 7;
    }

    return $value;
  }

  private function getHandle( string $path ) {
    if( !isset( $this->fileHandles[$path] ) ) {
      $this->fileHandles[$path] = @fopen( $path, 'rb' );
    }

    return $this->fileHandles[$path];
  }
}