Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
class GitPacks {
  private const MAX_READ = 16777216;

  private string $objectsPath;
  private array $packFiles;
  private ?string $lastPack = null;

  private array $fileHandles       = [];
  private array $fanoutCache       = [];
  private array $shaBucketCache    = [];
  private array $offsetBucketCache = [];

  public function __construct( string $objectsPath ) {
    $this->objectsPath = $objectsPath;
    $this->packFiles   = glob( "{$this->objectsPath}/pack/*.idx" ) ?: [];
  }

  public function __destruct() {
    foreach( $this->fileHandles as $handle ) {
      if( is_resource( $handle ) ) {
        fclose( $handle );
      }
    }
  }

  public function read( string $sha ): ?string {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] === -1 ) {
      return null;
    }

    $handle = $this->getHandle( $info['file'] );

    return $handle
      ? $this->readPackEntry( $handle, $info['offset'] )
      : null;
  }

  public function getSize( string $sha ): ?int {
    $info = $this->findPackInfo( $sha );

    if( $info['offset'] === -1 ) {
      return null;
    }

    return $this->extractPackedSize( $info['file'], $info['offset'] );
  }

  private function findPackInfo( string $sha ): array {
    if( !ctype_xdigit( $sha ) || strlen( $sha ) !== 40 ) {
      return ['offset' => -1];
    }

    $binarySha = hex2bin( $sha );

    if( $this->lastPack ) {
      $offset = $this->findInIdx( $this->lastPack, $binarySha );

      if( $offset !== -1 ) {
        return $this->makeResult( $this->lastPack, $offset );
      }
    }

    foreach( $this->packFiles as $indexFile ) {
      if( $indexFile === $this->lastPack ) {
        continue;
      }

      $offset = $this->findInIdx( $indexFile, $binarySha );

      if( $offset !== -1 ) {
        $this->lastPack = $indexFile;

        return $this->makeResult( $indexFile, $offset );
      }
    }

    return ['offset' => -1];
  }

  private function makeResult( string $indexPath, int $offset ): array {
    return [
      'file'   => str_replace( '.idx', '.pack', $indexPath ),
      'offset' => $offset
    ];
  }

  private function findInIdx( string $indexFile, string $binarySha ): int {
    $fileHandle = $this->getHandle( $indexFile );

    if( !$fileHandle ) {
      return -1;
    }

    if( !isset( $this->fanoutCache[$indexFile] ) ) {
      fseek( $fileHandle, 0 );

      if( fread( $fileHandle, 8 ) === "\377tOc\0\0\0\2" ) {
        $this->fanoutCache[$indexFile] = array_values(
          unpack( 'N*', fread( $fileHandle, 1024 ) )
        );
      } else {
        return -1;
      }
    }

    $fanout = $this->fanoutCache[$indexFile];

    $firstByte = ord( $binarySha[0] );
    $start     = $firstByte === 0 ? 0 : $fanout[$firstByte - 1];
    $end       = $fanout[$firstByte];

    if( $end <= $start ) {
      return -1;
    }

    $cacheKey = "$indexFile:$firstByte";

    if( !isset( $this->shaBucketCache[$cacheKey] ) ) {
      $count = $end - $start;
      fseek( $fileHandle, 1032 + ($start * 20) );
      $this->shaBucketCache[$cacheKey] = fread( $fileHandle, $count * 20 );

      fseek(
        $fileHandle,
        1032 + ($fanout[255] * 24) + ($start * 4)
      );
      $this->offsetBucketCache[$cacheKey] = fread( $fileHandle, $count * 4 );
    }

    $shaBlock  = $this->shaBucketCache[$cacheKey];
    $count     = strlen( $shaBlock ) / 20;
    $low       = 0;
    $high      = $count - 1;
    $foundIdx  = -1;

    while( $low <= $high ) {
      $mid     = ($low + $high) >> 1;
      $compare = substr( $shaBlock, $mid * 20, 20 );

      if( $compare < $binarySha ) {
        $low = $mid + 1;
      } elseif( $compare > $binarySha ) {
        $high = $mid - 1;
      } else {
        $foundIdx = $mid;
        break;
      }
    }

    if( $foundIdx === -1 ) {
      return -1;
    }

    $offsetData = substr(
      $this->offsetBucketCache[$cacheKey],
      $foundIdx * 4,
      4
    );
    $offset = unpack( 'N', $offsetData )[1];

    if( $offset & 0x80000000 ) {
      $packTotal = $fanout[255];
      $pos64     = 1032 + ($packTotal * 28) +
                   (($offset & 0x7FFFFFFF) * 8);
      fseek( $fileHandle, $pos64 );
      $offset = unpack( 'J', fread( $fileHandle, 8 ) )[1];
    }

    return (int)$offset;
  }

  // $fileHandle is resource, no type hint used for compatibility
  private function readPackEntry( $fileHandle, int $offset ): string {
    fseek( $fileHandle, $offset );

    $header = $this->readVarInt( $fileHandle );
    $type   = ($header['byte'] >> 4) & 7;

    if( $type === 6 ) {
      return $this->handleOfsDelta( $fileHandle, $offset );
    }

    if( $type === 7 ) {
      return $this->handleRefDelta( $fileHandle );
    }

    $inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
    $result   = '';

    while( !feof( $fileHandle ) ) {
      $chunk = fread( $fileHandle, 8192 );
      $data  = @inflate_add( $inflator, $chunk );

      if( $data !== false ) {
        $result .= $data;
      }

      if(
        $data === false ||
        inflate_get_status( $inflator ) === ZLIB_STREAM_END
      ) {
        break;
      }
    }

    return $result;
  }

  private function extractPackedSize( string $packPath, int $offset ): int {
    $fileHandle = $this->getHandle( $packPath );

    if( !$fileHandle ) {
      return 0;
    }

    fseek( $fileHandle, $offset );

    $header = $this->readVarInt( $fileHandle );
    $size   = $header['value'];
    $type   = ($header['byte'] >> 4) & 7;

    if( $type === 6 || $type === 7 ) {
      return $this->readDeltaTargetSize( $fileHandle, $type );
    }

    return $size;
  }

  private function handleOfsDelta( $fileHandle, int $offset ): string {
    $byte     = ord( fread( $fileHandle, 1 ) );
    $negative = $byte & 127;

    while( $byte & 128 ) {
      $byte     = ord( fread( $fileHandle, 1 ) );
      $negative = (($negative + 1) << 7) | ($byte & 127);
    }

    $currentPos = ftell( $fileHandle );
    $base       = $this->readPackEntry( $fileHandle, $offset - $negative );

    fseek( $fileHandle, $currentPos );

    $delta = @gzuncompress( fread( $fileHandle, self::MAX_READ ) ) ?: '';

    return $this->applyDelta( $base, $delta );
  }

  private function handleRefDelta( $fileHandle ): string {
    $baseSha = bin2hex( fread( $fileHandle, 20 ) );
    $base    = $this->read( $baseSha ) ?? '';
    $delta   = @gzuncompress( fread( $fileHandle, self::MAX_READ ) ) ?: '';

    return $this->applyDelta( $base, $delta );
  }

  private function applyDelta( string $base, string $delta ): string {
    $position = 0;
    $this->skipSize( $delta, $position );
    $this->skipSize( $delta, $position );

    $output       = '';
    $deltaLength  = strlen( $delta );

    while( $position < $deltaLength ) {
      $opcode = ord( $delta[$position++] );

      if( $opcode & 128 ) {
        $offset = 0;
        $length = 0;

        if( $opcode & 0x01 ) {
          $offset |= ord( $delta[$position++] );
        }
        if( $opcode & 0x02 ) {
          $offset |= ord( $delta[$position++] ) << 8;
        }
        if( $opcode & 0x04 ) {
          $offset |= ord( $delta[$position++] ) << 16;
        }
        if( $opcode & 0x08 ) {
          $offset |= ord( $delta[$position++] ) << 24;
        }

        if( $opcode & 0x10 ) {
          $length |= ord( $delta[$position++] );
        }
        if( $opcode & 0x20 ) {
          $length |= ord( $delta[$position++] ) << 8;
        }
        if( $opcode & 0x40 ) {
          $length |= ord( $delta[$position++] ) << 16;
        }

        if( $length === 0 ) {
          $length = 0x10000;
        }

        $output .= substr( $base, $offset, $length );
      } else {
        $length = $opcode & 127;
        $output .= substr( $delta, $position, $length );
        $position += $length;
      }
    }

    return $output;
  }

  private function readVarInt( $fileHandle ): array {
    $byte  = ord( fread( $fileHandle, 1 ) );
    $value = $byte & 15;
    $shift = 4;
    $first = $byte;

    while( $byte & 128 ) {
      $byte = ord( fread( $fileHandle, 1 ) );
      $value |= (($byte & 127) << $shift);
      $shift += 7;
    }

    return ['value' => $value, 'byte' => $first];
  }

  private function readDeltaTargetSize( $fileHandle, int $type ): int {
    if( $type === 6 ) {
      $byte = ord( fread( $fileHandle, 1 ) );

      while( $byte & 128 ) {
        $byte = ord( fread( $fileHandle, 1 ) );
      }
    } else {
      fseek( $fileHandle, 20, SEEK_CUR );
    }

    $inflator = inflate_init( ZLIB_ENCODING_DEFLATE );
    $header   = '';

    while( !feof( $fileHandle ) && strlen( $header ) < 32 ) {
      $chunk  = fread( $fileHandle, 512 );
      $output = @inflate_add( $inflator, $chunk, ZLIB_NO_FLUSH );

      if( $output !== false ) {
        $header .= $output;
      }

      if( inflate_get_status( $inflator ) === ZLIB_STREAM_END ) {
        break;
      }
    }

    $position = 0;

    if( strlen( $header ) > 0 ) {
      $this->skipSize( $header, $position );

      return $this->readSize( $header, $position );
    }

    return 0;
  }

  private function skipSize( string $data, int &$position ): void {
    while( ord( $data[$position++] ) & 128 ) {
      // Empty loop body
    }
  }

  private function readSize( string $data, int &$position ): int {
    $byte  = ord( $data[$position++] );
    $value = $byte & 127;
    $shift = 7;

    while( $byte & 128 ) {
      $byte = ord( $data[$position++] );
      $value |= (($byte & 127) << $shift);
      $shift += 7;
    }

    return $value;
  }

  private function getHandle( string $path ) {
    if( !isset( $this->fileHandles[$path] ) ) {
      $this->fileHandles[$path] = @fopen( $path, 'rb' );
    }

    return $this->fileHandles[$path];
  }
}