Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once __DIR__ . '/../File.php';

class GitDiff {
  private Git $git;
  private const MAX_DIFF_SIZE = 262144;

  public function __construct( Git $git ) {
    $this->git = $git;
  }

  public function diff( string $oldSha, string $newSha ): Generator {
    $oldTree = $oldSha !== '' ? $this->getTreeHash( $oldSha ) : '';
    $newTree = $newSha !== '' ? $this->getTreeHash( $newSha ) : '';

    yield from $this->diffTrees( $oldTree, $newTree );
  }

  public function compare( string $commitHash ): Generator {
    $commitData = $this->git->read( $commitHash );
    $parentHash = '';

    if( preg_match( '/^parent ([0-9a-f]{40})/m', $commitData, $m ) ) {
      $parentHash = $m[1];
    }

    $newTree = $this->getTreeHash( $commitHash );
    $oldTree = $parentHash !== '' ? $this->getTreeHash( $parentHash ) : '';

    yield from $this->diffTrees( $oldTree, $newTree );
  }

  private function getTreeHash( string $commitSha ): string {
    $data   = $this->git->read( $commitSha );
    $result = '';

    if( preg_match( '/^tree ([0-9a-f]{40})/m', $data, $matches ) ) {
      $result = $matches[1];
    }

    return $result;
  }

  private function diffTrees(
    string $oldTreeSha,
    string $newTreeSha,
    string $path = ''
  ): Generator {
    if( $oldTreeSha !== $newTreeSha ) {
      $oldEntries = $oldTreeSha !== ''
        ? $this->parseTree( $oldTreeSha )
        : [];
      $newEntries = $newTreeSha !== ''
        ? $this->parseTree( $newTreeSha )
        : [];
      $allNames   = array_unique(
        array_merge( array_keys( $oldEntries ), array_keys( $newEntries ) )
      );

      sort( $allNames );

      foreach( $allNames as $name ) {
        $old         = $oldEntries[$name] ?? null;
        $new         = $newEntries[$name] ?? null;
        $currentPath = $path !== '' ? "$path/$name" : $name;

        if( !$old && $new ) {
          if( $new['is_dir'] ) {
            yield from $this->diffTrees( '', $new['sha'], $currentPath );
          } else {
            yield $this->createChange( 'A', $currentPath, '', $new['sha'] );
          }
        } elseif( !$new && $old ) {
          if( $old['is_dir'] ) {
            yield from $this->diffTrees( $old['sha'], '', $currentPath );
          } else {
            yield $this->createChange( 'D', $currentPath, $old['sha'], '' );
          }
        } elseif( $old && $new && $old['sha'] !== $new['sha'] ) {
          if( $old['is_dir'] && $new['is_dir'] ) {
            yield from $this->diffTrees(
              $old['sha'],
              $new['sha'],
              $currentPath
            );
          } elseif( !$old['is_dir'] && !$new['is_dir'] ) {
            yield $this->createChange(
              'M',
              $currentPath,
              $old['sha'],
              $new['sha']
            );
          }
        }
      }
    }
  }

  private function parseTree( string $sha ): array {
    $data    = $this->git->read( $sha );
    $entries = [];
    $len     = strlen( $data );
    $pos     = 0;

    while( $pos < $len ) {
      $space = strpos( $data, ' ', $pos );
      $null  = strpos( $data, "\0", $space );

      if( $space === false || $null === false ) {
        break;
      }

      $mode = substr( $data, $pos, $space - $pos );
      $name = substr( $data, $space + 1, $null - $space - 1 );
      $hash = bin2hex( substr( $data, $null + 1, 20 ) );

      $entries[$name] = [
        'mode'   => $mode,
        'sha'    => $hash,
        'is_dir' => $mode === '40000' || $mode === '040000'
      ];

      $pos = $null + 21;
    }

    return $entries;
  }

  private function createChange(
    string $type,
    string $path,
    string $oldSha,
    string $newSha
  ): array {
    $oldSize = $oldSha !== '' ? $this->git->getObjectSize( $oldSha ) : 0;
    $newSize = $newSha !== '' ? $this->git->getObjectSize( $newSha ) : 0;
    $result  = [];

    if( $oldSize > self::MAX_DIFF_SIZE || $newSize > self::MAX_DIFF_SIZE ) {
      $result = [
        'type'      => $type,
        'path'      => $path,
        'is_binary' => true,
        'hunks'     => []
      ];
    } else {
      $oldContent = $oldSha !== '' ? $this->git->read( $oldSha ) : '';
      $newContent = $newSha !== '' ? $this->git->read( $newSha ) : '';
      $vDiffOld   = new VirtualDiffFile( $path, $oldContent );
      $vDiffNew   = new VirtualDiffFile( $path, $newContent );

      $isBinary = ($newSha !== '' && $vDiffNew->isBinary()) ||
                  ($newSha === '' && $oldSha !== '' && $vDiffOld->isBinary());

      $result = [
        'type'      => $type,
        'path'      => $path,
        'is_binary' => $isBinary,
        'hunks'     => $isBinary
          ? null
          : $this->calculateDiff( $oldContent, $newContent )
      ];
    }

    return $result;
  }

  private function calculateDiff( string $old, string $new ): array {
    $oldLines = explode( "\n", str_replace( "\r\n", "\n", $old ) );
    $newLines = explode( "\n", str_replace( "\r\n", "\n", $new ) );
    $m        = count( $oldLines );
    $n        = count( $newLines );
    $start    = 0;
    $end      = 0;

    while( $start < $m && $start < $n &&
           $oldLines[$start] === $newLines[$start] ) {
      $start++;
    }

    while( $m - $end > $start && $n - $end > $start &&
           $oldLines[$m - 1 - $end] === $newLines[$n - 1 - $end] ) {
      $end++;
    }

    $context = 2;
    $limit   = 100000;
    $stream  = [];

    $pStart = max( 0, $start - $context );

    for( $i = $pStart; $i < $start; $i++ ) {
      $stream[] = [
        't'  => ' ',
        'l'  => $oldLines[$i],
        'no' => $i + 1,
        'nn' => $i + 1
      ];
    }

    $oldSlice = array_slice( $oldLines, $start, $m - $start - $end );
    $newSlice = array_slice( $newLines, $start, $n - $start - $end );
    $mid      = [];

    if( (count( $oldSlice ) * count( $newSlice )) > $limit ) {
      $mid = $this->buildFallbackDiff( $oldSlice, $newSlice, $start );
    } else {
      $ops = $this->computeLCS( $oldSlice, $newSlice );
      $mid = $this->buildDiffStream( $ops, $start );
    }

    foreach( $mid as $line ) {
      $stream[] = $line;
    }

    $sLimit = min( $end, $context );

    for( $i = 0; $i < $sLimit; $i++ ) {
      $idxO = $m - $end + $i;
      $idxN = $n - $end + $i;
      $stream[] = [
        't'  => ' ',
        'l'  => $oldLines[$idxO],
        'no' => $idxO + 1,
        'nn' => $idxN + 1
      ];
    }

    return $this->formatDiffOutput( $stream );
  }

  private function formatDiffOutput( array $stream ): array {
    $n       = count( $stream );
    $keep    = array_fill( 0, $n, false );
    $context = 2;

    for( $i = 0; $i < $n; $i++ ) {
      if( $stream[$i]['t'] !== ' ' ) {
        $low  = max( 0, $i - $context );
        $high = min( $n - 1, $i + $context );

        for( $j = $low; $j <= $high; $j++ ) {
          $keep[$j] = true;
        }
      }
    }

    $result = [];
    $buffer = [];

    for( $i = 0; $i < $n; $i++ ) {
      if( $keep[$i] ) {
        $cnt = count( $buffer );

        if( $cnt > 0 ) {
          if( $cnt > 5 ) {
            $result[] = [ 't' => 'gap' ];
          } else {
            foreach( $buffer as $bufLine ) {
              $result[] = $bufLine;
            }
          }
          $buffer = [];
        }

        $result[] = $stream[$i];
      } else {
        $buffer[] = $stream[$i];
      }
    }

    $cnt = count( $buffer );

    if( $cnt > 0 ) {
      if( $cnt > 5 ) {
        $result[] = [ 't' => 'gap' ];
      } else {
        foreach( $buffer as $bufLine ) {
          $result[] = $bufLine;
        }
      }
    }

    return $result;
  }

  private function buildFallbackDiff(
    array $old,
    array $new,
    int $offset
  ): array {
    $stream = [];
    $currO  = $offset + 1;
    $currN  = $offset + 1;

    foreach( $old as $line ) {
      $stream[] = [
        't'  => '-',
        'l'  => $line,
        'no' => $currO++,
        'nn' => null
      ];
    }

    foreach( $new as $line ) {
      $stream[] = [
        't'  => '+',
        'l'  => $line,
        'no' => null,
        'nn' => $currN++
      ];
    }

    return $stream;
  }

  private function buildDiffStream( array $ops, int $start ): array {
    $stream = [];
    $currO  = $start + 1;
    $currN  = $start + 1;

    foreach( $ops as $op ) {
      $stream[] = [
        't'  => $op['t'],
        'l'  => $op['l'],
        'no' => $op['t'] === '+' ? null : $currO++,
        'nn' => $op['t'] === '-' ? null : $currN++
      ];
    }

    return $stream;
  }

  private function computeLCS( array $old, array $new ): array {
    $m = count( $old );
    $n = count( $new );
    $c = array_fill( 0, $m + 1, array_fill( 0, $n + 1, 0 ) );

    for( $i = 1; $i <= $m; $i++ ) {
      for( $j = 1; $j <= $n; $j++ ) {
        $c[$i][$j] = ($old[$i - 1] === $new[$j - 1])
          ? $c[$i - 1][$j - 1] + 1
          : max( $c[$i][$j - 1], $c[$i - 1][$j] );
      }
    }

    $diff = [];
    $i    = $m;
    $j    = $n;

    while( $i > 0 || $j > 0 ) {
      if( $i > 0 && $j > 0 && $old[$i - 1] === $new[$j - 1] ) {
        $diff[] = [ 't' => ' ', 'l' => $old[$i - 1] ];
        $i--;
        $j--;
      } elseif( $j > 0 && ($i === 0 || $c[$i][$j - 1] >= $c[$i - 1][$j]) ) {
        $diff[] = [ 't' => '+', 'l' => $new[$j - 1] ];
        $j--;
      } elseif( $i > 0 && ($j === 0 || $c[$i][$j - 1] < $c[$i - 1][$j]) ) {
        $diff[] = [ 't' => '-', 'l' => $old[$i - 1] ];
        $i--;
      }
    }

    return array_reverse( $diff );
  }
}

class VirtualDiffFile extends File {
  public function __construct( string $name, string $content ) {
    parent::__construct(
      $name,
      '',
      '100644',
      0,
      strlen( $content ),
      $content
    );
  }
}