Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once __DIR__ . '/CompressionStream.php';

class GitPacks {
  private const MAX_READ     = 1040576;
  private const MAX_RAM      = 1048576;
  private const MAX_BASE_RAM = 2097152;
  private const MAX_DEPTH    = 200;

  private string $objectsPath;
  private array  $packFiles;
  private string $lastPack = '';
  private array  $fileHandles;
  private array  $fanoutCache;
  private array  $shaBucketCache;
  private array  $offsetBucketCache;

  public function __construct( string $objectsPath ) {
    $this->objectsPath       = $objectsPath;
    $this->packFiles         = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
    $this->fileHandles       = [];
    $this->fanoutCache       = [];
    $this->shaBucketCache    = [];
    $this->offsetBucketCache = [];
  }

  public function __destruct() {
    foreach( $this->fileHandles as $handle ) {
      if( is_resource( $handle ) ) {
        fclose( $handle );
      }
    }
  }

  public function peek( string $sha, int $len = 12 ): string {
    $info   = $this->findPackInfo( $sha );
    $result = '';

    if( $info['offset'] !== 0 ) {
      $handle = $this->getHandle( $info['file'] );

      if( $handle ) {
        $result = $this->readPackEntry(
          $handle,
          $info['offset'],
          $len,
          $len
        );
      }
    }

    return $result;
  }

  public function read( string $sha ): string {
    $info   = $this->findPackInfo( $sha );
    $result = '';

    if( $info['offset'] !== 0 ) {
      $size = $this->extractPackedSize( $info['file'], $info['offset'] );

      if( $size <= self::MAX_RAM ) {
        $handle = $this->getHandle( $info['file'] );

        if( $handle ) {
          $result = $this->readPackEntry(
            $handle,
            $info['offset'],
            $size
          );
        }
      }
    }

    return $result;
  }

  public function stream( string $sha, callable $callback ): bool {
    $result = false;

    foreach( $this->streamGenerator( $sha ) as $chunk ) {
      $callback( $chunk );
      $result = true;
    }

    return $result;
  }

  public function streamGenerator( string $sha ): Generator {
    yield from $this->streamShaGenerator( $sha, 0 );
  }

  private function streamShaGenerator( string $sha, int $depth ): Generator {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] !== 0 ) {
      $handle = $this->getHandle( $info['file'] );

      if( $handle ) {
        yield from $this->streamPackEntryGenerator(
          $handle,
          $info['offset'],
          $depth
        );
      }
    }
  }

  public function getSize( string $sha ): int {
    $info   = $this->findPackInfo( $sha );
    $result = 0;

    if( $info['offset'] !== 0 ) {
      $result = $this->extractPackedSize( $info['file'], $info['offset'] );
    }

    return $result;
  }

  private function findPackInfo( string $sha ): array {
    $result = [ 'offset' => 0, 'file' => '' ];

    if( strlen( $sha ) === 40 && ctype_xdigit( $sha ) ) {
      $binarySha = hex2bin( $sha );

      if( $this->lastPack !== '' ) {
        $offset = $this->findInIdx( $this->lastPack, $binarySha );

        if( $offset !== 0 ) {
          $result = [
            'file'   => str_replace( '.idx', '.pack', $this->lastPack ),
            'offset' => $offset
          ];
        }
      }

      if( $result['offset'] === 0 ) {
        $count = count( $this->packFiles );
        $idx   = 0;
        $found = false;

        while( !$found && $idx < $count ) {
          $indexFile = $this->packFiles[$idx];

          if( $indexFile !== $this->lastPack ) {
            $offset = $this->findInIdx( $indexFile, $binarySha );

            if( $offset !== 0 ) {
              $this->lastPack = $indexFile;
              $result         = [
                'file'   => str_replace( '.idx', '.pack', $indexFile ),
                'offset' => $offset
              ];
              $found          = true;
            }
          }

          $idx++;
        }
      }
    }

    return $result;
  }

  private function findInIdx( string $indexFile, string $binarySha ): int {
    $handle = $this->getHandle( $indexFile );
    $result = 0;

    if( $handle ) {
      if( !isset( $this->fanoutCache[$indexFile] ) ) {
        fseek( $handle, 0 );
        $head = fread( $handle, 8 );

        if( $head === "\377tOc\0\0\0\2" ) {
          $this->fanoutCache[$indexFile] = array_values(
            unpack( 'N*', fread( $handle, 1024 ) )
          );
        }
      }

      if( isset( $this->fanoutCache[$indexFile] ) ) {
        $fanout = $this->fanoutCache[$indexFile];
        $byte   = ord( $binarySha[0] );
        $start  = $byte === 0 ? 0 : $fanout[$byte - 1];
        $end    = $fanout[$byte];

        if( $end > $start ) {
          $result = $this->binarySearchIdx(
            $indexFile,
            $handle,
            $start,
            $end,
            $binarySha,
            $fanout[255]
          );
        }
      }
    }

    return $result;
  }

  private function binarySearchIdx(
    string $indexFile,
    $handle,
    int $start,
    int $end,
    string $binarySha,
    int $total
  ): int {
    $key    = "$indexFile:$start";
    $count  = $end - $start;
    $result = 0;

    if( !isset( $this->shaBucketCache[$key] ) ) {
      fseek( $handle, 1032 + ($start * 20) );
      $this->shaBucketCache[$key] = fread( $handle, $count * 20 );

      fseek( $handle, 1032 + ($total * 24) + ($start * 4) );
      $this->offsetBucketCache[$key] = fread( $handle, $count * 4 );
    }

    $shaBlock = $this->shaBucketCache[$key];
    $low      = 0;
    $high     = $count - 1;
    $found    = -1;

    while( $found === -1 && $low <= $high ) {
      $mid = ($low + $high) >> 1;
      $cmp = substr( $shaBlock, $mid * 20, 20 );

      if( $cmp < $binarySha ) {
        $low = $mid + 1;
      } elseif( $cmp > $binarySha ) {
        $high = $mid - 1;
      } else {
        $found = $mid;
      }
    }

    if( $found !== -1 ) {
      $packed = substr( $this->offsetBucketCache[$key], $found * 4, 4 );
      $offset = unpack( 'N', $packed )[1];

      if( $offset & 0x80000000 ) {
        $pos64 = 1032 + ($total * 28) + (($offset & 0x7FFFFFFF) * 8);

        fseek( $handle, $pos64 );
        $offset = unpack( 'J', fread( $handle, 8 ) )[1];
      }

      $result = (int)$offset;
    }

    return $result;
  }

  private function readPackEntry(
    $handle,
    int $offset,
    int $size,
    int $cap = 0
  ): string {
    fseek( $handle, $offset );
    $header = $this->readVarInt( $handle );
    $type   = ($header['byte'] >> 4) & 7;
    $result = '';

    if( $type === 6 ) {
      $result = $this->handleOfsDelta( $handle, $offset, $size, $cap );
    } elseif( $type === 7 ) {
      $result = $this->handleRefDelta( $handle, $size, $cap );
    } else {
      $result = $this->decompressToString( $handle, $cap );
    }

    return $result;
  }

  private function streamPackEntryGenerator(
    $handle,
    int $offset,
    int $depth
  ): Generator {
    fseek( $handle, $offset );
    $header = $this->readVarInt( $handle );
    $type   = ($header['byte'] >> 4) & 7;

    if( $type === 6 || $type === 7 ) {
      yield from $this->streamDeltaObjectGenerator(
        $handle,
        $offset,
        $type,
        $depth
      );
    } else {
      yield from $this->streamDecompressionGenerator( $handle );
    }
  }

  private function resolveBaseToTempFile(
    $packHandle,
    int $baseOffset,
    int $depth
  ) {
    $tmpHandle = tmpfile();

    if( $tmpHandle !== false ) {
      foreach( $this->streamPackEntryGenerator(
        $packHandle,
        $baseOffset,
        $depth + 1
      ) as $chunk ) {
        fwrite( $tmpHandle, $chunk );
      }

      rewind( $tmpHandle );
    } else {
      error_log(
        "[GitPacks] tmpfile failed for ofs-delta base at $baseOffset"
      );
    }

    return $tmpHandle;
  }

  private function streamDeltaObjectGenerator(
    $handle,
    int $offset,
    int $type,
    int $depth
  ): Generator {
    if( $depth < self::MAX_DEPTH ) {
      fseek( $handle, $offset );
      $this->readVarInt( $handle );

      if( $type === 6 ) {
        $neg      = $this->readOffsetDelta( $handle );
        $deltaPos = ftell( $handle );
        $baseSize = $this->extractPackedSize( $handle, $offset - $neg );

        if( $baseSize > self::MAX_BASE_RAM ) {
          $tmpHandle = $this->resolveBaseToTempFile(
            $handle,
            $offset - $neg,
            $depth
          );

          if( $tmpHandle !== false ) {
            fseek( $handle, $deltaPos );
            yield from $this->applyDeltaStreamGenerator(
              $handle,
              $tmpHandle
            );

            fclose( $tmpHandle );
          }
        } else {
          $base = '';

          foreach( $this->streamPackEntryGenerator(
            $handle,
            $offset - $neg,
            $depth + 1
          ) as $chunk ) {
            $base .= $chunk;
          }

          fseek( $handle, $deltaPos );
          yield from $this->applyDeltaStreamGenerator( $handle, $base );
        }
      } else {
        $baseSha  = bin2hex( fread( $handle, 20 ) );
        $baseSize = $this->getSize( $baseSha );

        if( $baseSize > self::MAX_BASE_RAM ) {
          $tmpHandle = tmpfile();

          if( $tmpHandle !== false ) {
            $written = false;

            foreach( $this->streamShaGenerator(
              $baseSha,
              $depth + 1
            ) as $chunk ) {
              fwrite( $tmpHandle, $chunk );
              $written = true;
            }

            if( $written ) {
              rewind( $tmpHandle );
              yield from $this->applyDeltaStreamGenerator(
                $handle,
                $tmpHandle
              );
            }

            fclose( $tmpHandle );
          } else {
            error_log(
              "[GitPacks] tmpfile() failed for ref-delta (sha=$baseSha)"
            );
          }
        } else {
          $base    = '';
          $written = false;

          foreach( $this->streamShaGenerator(
            $baseSha,
            $depth + 1
          ) as $chunk ) {
            $base    .= $chunk;
            $written  = true;
          }

          if( $written ) {
            yield from $this->applyDeltaStreamGenerator( $handle, $base );
          }
        }
      }
    } else {
      error_log( "[GitPacks] delta depth limit exceeded at offset $offset" );
    }
  }

  private function applyDeltaStreamGenerator(
    $handle,
    $base
  ): Generator {
    $stream = CompressionStream::createInflater();
    $state  = 0;
    $buffer = '';
    $done   = false;
    $isFile = is_resource( $base );

    while( !$done && !feof( $handle ) ) {
      $chunk = fread( $handle, 8192 );
      $done  = $chunk === false || $chunk === '';

      if( !$done ) {
        $data = $stream->pump( $chunk );

        if( $data !== '' ) {
          $buffer     .= $data;
          $doneBuffer  = false;

          while( !$doneBuffer ) {
            $len = strlen( $buffer );

            if( $len === 0 ) {
              $doneBuffer = true;
            }

            if( !$doneBuffer ) {
              if( $state < 2 ) {
                $pos = 0;

                while( $pos < $len && (ord( $buffer[$pos] ) & 128) ) {
                  $pos++;
                }

                if( $pos === $len && (ord( $buffer[$pos - 1] ) & 128) ) {
                  $doneBuffer = true;
                }

                if( !$doneBuffer ) {
                  $buffer = substr( $buffer, $pos + 1 );
                  $state++;
                }
              } else {
                $op = ord( $buffer[0] );

                if( $op & 128 ) {
                  $need = $this->getCopyInstructionSize( $op );

                  if( $len < 1 + $need ) {
                    $doneBuffer = true;
                  }

                  if( !$doneBuffer ) {
                    $info = $this->parseCopyInstruction( $op, $buffer, 1 );

                    if( $isFile ) {
                      fseek( $base, $info['off'] );
                      $rem = $info['len'];

                      while( $rem > 0 ) {
                        $slc = fread( $base, min( 65536, $rem ) );

                        if( $slc === false || $slc === '' ) {
                          $rem = 0;
                        } else {
                          yield $slc;
                          $rem -= strlen( $slc );
                        }
                      }
                    } else {
                      yield substr( $base, $info['off'], $info['len'] );
                    }

                    $buffer = substr( $buffer, 1 + $need );
                  }
                } else {
                  $ln = $op & 127;

                  if( $len < 1 + $ln ) {
                    $doneBuffer = true;
                  }

                  if( !$doneBuffer ) {
                    yield substr( $buffer, 1, $ln );
                    $buffer = substr( $buffer, 1 + $ln );
                  }
                }
              }
            }
          }
        }

        $done = $stream->finished();
      }
    }
  }

  private function streamDecompressionGenerator( $handle ): Generator {
    $stream = CompressionStream::createInflater();
    $done   = false;

    while( !$done && !feof( $handle ) ) {
      $chunk = fread( $handle, 8192 );
      $done  = $chunk === false || $chunk === '';

      if( !$done ) {
        $data = $stream->pump( $chunk );

        if( $data !== '' ) {
          yield $data;
        }

        $done = $stream->finished();
      }
    }
  }

  private function decompressToString(
    $handle,
    int $cap = 0
  ): string {
    $stream = CompressionStream::createInflater();
    $res    = '';
    $done   = false;

    while( !$done && !feof( $handle ) ) {
      $chunk = fread( $handle, 8192 );
      $done  = $chunk === false || $chunk === '';

      if( !$done ) {
        $data = $stream->pump( $chunk );

        if( $data !== '' ) {
          $res .= $data;
        }

        if( $cap > 0 && strlen( $res ) >= $cap ) {
          $res  = substr( $res, 0, $cap );
          $done = true;
        }

        if( !$done ) {
          $done = $stream->finished();
        }
      }
    }

    return $res;
  }

  private function extractPackedSize( $packPathOrHandle, int $offset ): int {
    $handle = is_resource( $packPathOrHandle )
      ? $packPathOrHandle
      : $this->getHandle( $packPathOrHandle );
    $size   = 0;

    if( $handle ) {
      fseek( $handle, $offset );
      $header = $this->readVarInt( $handle );
      $size   = $header['value'];
      $type   = ($header['byte'] >> 4) & 7;

      if( $type === 6 || $type === 7 ) {
        $size = $this->readDeltaTargetSize( $handle, $type );
      }
    }

    return $size;
  }

  private function handleOfsDelta(
    $handle,
    int $offset,
    int $size,
    int $cap
  ): string {
    $neg  = $this->readOffsetDelta( $handle );
    $cur  = ftell( $handle );
    $base = $offset - $neg;

    fseek( $handle, $base );
    $bHead = $this->readVarInt( $handle );

    fseek( $handle, $base );
    $bData = $this->readPackEntry( $handle, $base, $bHead['value'], $cap );

    fseek( $handle, $cur );
    $rem   = min( self::MAX_READ, max( $size * 2, 1048576 ) );
    $comp  = fread( $handle, $rem );
    $delta = @gzuncompress( $comp ) ?: '';

    return $this->applyDelta( $bData, $delta, $cap );
  }

  private function handleRefDelta( $handle, int $size, int $cap ): string {
    $sha = bin2hex( fread( $handle, 20 ) );
    $bas = $cap > 0 ? $this->peek( $sha, $cap ) : $this->read( $sha );
    $rem = min( self::MAX_READ, max( $size * 2, 1048576 ) );
    $cmp = fread( $handle, $rem );
    $del = @gzuncompress( $cmp ) ?: '';

    return $this->applyDelta( $bas, $del, $cap );
  }

  private function applyDelta( string $base, string $delta, int $cap ): string {
    $pos = 0;
    $res = $this->readDeltaSize( $delta, $pos );
    $pos += $res['used'];
    $res = $this->readDeltaSize( $delta, $pos );
    $pos += $res['used'];

    $out  = '';
    $len  = strlen( $delta );
    $done = false;

    while( !$done && $pos < $len ) {
      if( $cap > 0 && strlen( $out ) >= $cap ) {
        $done = true;
      }

      if( !$done ) {
        $op = ord( $delta[$pos++] );

        if( $op & 128 ) {
          $info = $this->parseCopyInstruction( $op, $delta, $pos );
          $out  .= substr( $base, $info['off'], $info['len'] );
          $pos  += $info['used'];
        } else {
          $ln   = $op & 127;
          $out  .= substr( $delta, $pos, $ln );
          $pos  += $ln;
        }
      }
    }

    return $out;
  }

  private function parseCopyInstruction(
    int $op,
    string $data,
    int $pos
  ): array {
    $off = 0;
    $len = 0;
    $ptr = $pos;

    if( $op & 0x01 ) {
      $off |= ord( $data[$ptr++] );
    }

    if( $op & 0x02 ) {
      $off |= ord( $data[$ptr++] ) << 8;
    }

    if( $op & 0x04 ) {
      $off |= ord( $data[$ptr++] ) << 16;
    }

    if( $op & 0x08 ) {
      $off |= ord( $data[$ptr++] ) << 24;
    }

    if( $op & 0x10 ) {
      $len |= ord( $data[$ptr++] );
    }

    if( $op & 0x20 ) {
      $len |= ord( $data[$ptr++] ) << 8;
    }

    if( $op & 0x40 ) {
      $len |= ord( $data[$ptr++] ) << 16;
    }

    return [
      'off'  => $off,
      'len'  => $len === 0 ? 0x10000 : $len,
      'used' => $ptr - $pos
    ];
  }

  private function getCopyInstructionSize( int $op ): int {
    $c = $op & 0x7F;
    $c = $c - (($c >> 1) & 0x55);
    $c = (($c >> 2) & 0x33) + ($c & 0x33);
    $c = (($c >> 4) + $c) & 0x0F;

    return $c;
  }

  private function readVarInt( $handle ): array {
    $byte = ord( fread( $handle, 1 ) );
    $val  = $byte & 15;
    $shft = 4;
    $fst  = $byte;

    while( $byte & 128 ) {
      $byte  = ord( fread( $handle, 1 ) );
      $val  |= (($byte & 127) << $shft);
      $shft += 7;
    }

    return [ 'value' => $val, 'byte' => $fst ];
  }

  private function readOffsetDelta( $handle ): int {
    $byte = ord( fread( $handle, 1 ) );
    $neg  = $byte & 127;

    while( $byte & 128 ) {
      $byte = ord( fread( $handle, 1 ) );
      $neg  = (($neg + 1) << 7) | ($byte & 127);
    }

    return $neg;
  }

  private function readDeltaTargetSize( $handle, int $type ): int {
    if( $type === 6 ) {
      $b = ord( fread( $handle, 1 ) );

      while( $b & 128 ) {
        $b = ord( fread( $handle, 1 ) );
      }
    } else {
      fseek( $handle, 20, SEEK_CUR );
    }

    $stream = CompressionStream::createInflater();
    $head   = '';
    $try    = 0;
    $done   = false;

    while( !$done && !feof( $handle ) && strlen( $head ) < 32 && $try < 64 ) {
      $chunk = fread( $handle, 512 );
      $done  = $chunk === false || $chunk === '';

      if( !$done ) {
        $out = $stream->pump( $chunk );

        if( $out !== '' ) {
          $head .= $out;
        }

        $done = $stream->finished();
        $try++;
      }
    }

    $pos    = 0;
    $result = 0;

    if( strlen( $head ) > 0 ) {
      $res  = $this->readDeltaSize( $head, $pos );
      $pos += $res['used'];
      $res  = $this->readDeltaSize( $head, $pos );

      $result = $res['val'];
    }

    return $result;
  }

  private function readDeltaSize( string $data, int $pos ): array {
    $len   = strlen( $data );
    $val   = 0;
    $shift = 0;
    $start = $pos;
    $done  = false;

    while( !$done && $pos < $len ) {
      $byte  = ord( $data[$pos++] );
      $val  |= ($byte & 0x7F) << $shift;

      if( !($byte & 0x80) ) {
        $done = true;
      }

      if( !$done ) {
        $shift += 7;
      }
    }

    return [ 'val' => $val, 'used' => $pos - $start ];
  }

  private function getHandle( string $path ) {
    if( !isset( $this->fileHandles[$path] ) ) {
      $this->fileHandles[$path] = @fopen( $path, 'rb' );
    }

    return $this->fileHandles[$path];
  }
}