Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once __DIR__ . '/../File.php';

class GitDiff {
  private Git $git;
  private const MAX_DIFF_SIZE = 262144;

  public function __construct( Git $git ) {
    $this->git = $git;
  }

  public function diff( string $oldSha, string $newSha ): Generator {
    $oldTree = $oldSha !== '' ? $this->getTreeHash( $oldSha ) : '';
    $newTree = $newSha !== '' ? $this->getTreeHash( $newSha ) : '';

    yield from $this->diffTrees( $oldTree, $newTree );
  }

  public function compare( string $commitHash ): Generator {
    $commitData = $this->git->read( $commitHash );
    $parentHash = '';

    if( preg_match( '/^parent ([0-9a-f]{40})/m', $commitData, $m ) ) {
      $parentHash = $m[1];
    }

    $newTree = $this->getTreeHash( $commitHash );
    $oldTree = $parentHash !== '' ? $this->getTreeHash( $parentHash ) : '';

    yield from $this->diffTrees( $oldTree, $newTree );
  }

  private function getTreeHash( string $commitSha ): string {
    $data   = $this->git->read( $commitSha );
    $result = '';

    if( preg_match( '/^tree ([0-9a-f]{40})/m', $data, $matches ) ) {
      $result = $matches[1];
    }

    return $result;
  }

  private function diffTrees(
    string $oldTreeSha,
    string $newTreeSha,
    string $path = ''
  ): Generator {
    if( $oldTreeSha !== $newTreeSha ) {
      $oldEntries = $oldTreeSha !== ''
        ? $this->parseTree( $oldTreeSha )
        : [];
      $newEntries = $newTreeSha !== ''
        ? $this->parseTree( $newTreeSha )
        : [];
      $allNames   = array_unique(
        array_merge( array_keys( $oldEntries ), array_keys( $newEntries ) )
      );

      sort( $allNames );

      foreach( $allNames as $name ) {
        $old         = $oldEntries[$name] ?? null;
        $new         = $newEntries[$name] ?? null;
        $currentPath = $path !== '' ? "$path/$name" : $name;

        if( !$old && $new ) {
          if( $new['file']->isDir() ) {
            yield from $this->diffTrees( '', $new['sha'], $currentPath );
          } else {
            yield $this->createChange(
              'A',
              $currentPath,
              '',
              $new['sha'],
              null,
              $new['file']
            );
          }
        } elseif( !$new && $old ) {
          if( $old['file']->isDir() ) {
            yield from $this->diffTrees( $old['sha'], '', $currentPath );
          } else {
            yield $this->createChange(
              'D',
              $currentPath,
              $old['sha'],
              '',
              $old['file'],
              null
            );
          }
        } elseif( $old && $new && $old['sha'] !== $new['sha'] ) {
          if( $old['file']->isDir() && $new['file']->isDir() ) {
            yield from $this->diffTrees(
              $old['sha'],
              $new['sha'],
              $currentPath
            );
          } elseif( !$old['file']->isDir() && !$new['file']->isDir() ) {
            yield $this->createChange(
              'M',
              $currentPath,
              $old['sha'],
              $new['sha'],
              $old['file'],
              $new['file']
            );
          }
        }
      }
    }
  }

  private function parseTree( string $sha ): array {
    $data    = $this->git->read( $sha );
    $entries = [];

    $this->git->parseTreeData(
      $data,
      function( $file, $name, $hash, $mode ) use ( &$entries ) {
        $entries[$name] = [
          'file' => $file,
          'sha'  => $hash
        ];
      }
    );

    return $entries;
  }

  private function createChange(
    string $type,
    string $path,
    string $oldSha,
    string $newSha,
    ?File $oldFile = null,
    ?File $newFile = null
  ): array {
    $oldSize = $oldSha !== '' ? $this->git->getObjectSize( $oldSha ) : 0;
    $newSize = $newSha !== '' ? $this->git->getObjectSize( $newSha ) : 0;
    $result  = [];

    if( $oldSize > self::MAX_DIFF_SIZE || $newSize > self::MAX_DIFF_SIZE ) {
      $result = [
        'type'      => $type,
        'path'      => $path,
        'is_binary' => true,
        'hunks'     => []
      ];
    } else {
      $oldContent = $oldSha !== '' ? $this->git->read( $oldSha ) : '';
      $newContent = $newSha !== '' ? $this->git->read( $newSha ) : '';
      $isBinary   = false;

      if( $newFile !== null ) {
        $isBinary = $newFile->isBinary();
      } elseif( $oldFile !== null ) {
        $isBinary = $oldFile->isBinary();
      }

      $result = [
        'type'      => $type,
        'path'      => $path,
        'is_binary' => $isBinary,
        'hunks'     => $isBinary
          ? null
          : $this->calculateDiff( $oldContent, $newContent )
      ];
    }

    return $result;
  }

  private function calculateDiff( string $old, string $new ): array {
    $oldLines = explode( "\n", str_replace( "\r\n", "\n", $old ) );
    $newLines = explode( "\n", str_replace( "\r\n", "\n", $new ) );
    $m        = count( $oldLines );
    $n        = count( $newLines );
    $start    = 0;
    $end      = 0;

    while( $start < $m && $start < $n &&
           $oldLines[$start] === $newLines[$start] ) {
      $start++;
    }

    while( $m - $end > $start && $n - $end > $start &&
           $oldLines[$m - 1 - $end] === $newLines[$n - 1 - $end] ) {
      $end++;
    }

    $context = 2;
    $limit   = 100000;
    $stream  = [];

    $pStart = max( 0, $start - $context );

    for( $i = $pStart; $i < $start; $i++ ) {
      $stream[] = [
        't'  => ' ',
        'l'  => $oldLines[$i],
        'no' => $i + 1,
        'nn' => $i + 1
      ];
    }

    $oldSlice = array_slice( $oldLines, $start, $m - $start - $end );
    $newSlice = array_slice( $newLines, $start, $n - $start - $end );
    $mid      = [];

    if( (count( $oldSlice ) * count( $newSlice )) > $limit ) {
      $mid = $this->buildFallbackDiff( $oldSlice, $newSlice, $start );
    } else {
      $ops = $this->computeLCS( $oldSlice, $newSlice );
      $mid = $this->buildDiffStream( $ops, $start );
    }

    foreach( $mid as $line ) {
      $stream[] = $line;
    }

    $sLimit = min( $end, $context );

    for( $i = 0; $i < $sLimit; $i++ ) {
      $idxO = $m - $end + $i;
      $idxN = $n - $end + $i;
      $stream[] = [
        't'  => ' ',
        'l'  => $oldLines[$idxO],
        'no' => $idxO + 1,
        'nn' => $idxN + 1
      ];
    }

    return $this->formatDiffOutput( $stream );
  }

  private function formatDiffOutput( array $stream ): array {
    $n       = count( $stream );
    $keep    = array_fill( 0, $n, false );
    $context = 2;

    for( $i = 0; $i < $n; $i++ ) {
      if( $stream[$i]['t'] !== ' ' ) {
        $low  = max( 0, $i - $context );
        $high = min( $n - 1, $i + $context );

        for( $j = $low; $j <= $high; $j++ ) {
          $keep[$j] = true;
        }
      }
    }

    $result = [];
    $buffer = [];

    for( $i = 0; $i < $n; $i++ ) {
      if( $keep[$i] ) {
        $cnt = count( $buffer );

        if( $cnt > 0 ) {
          if( $cnt > 5 ) {
            $result[] = [ 't' => 'gap' ];
          } else {
            foreach( $buffer as $bufLine ) {
              $result[] = $bufLine;
            }
          }

          $buffer = [];
        }

        $result[] = $stream[$i];
      } else {
        $buffer[] = $stream[$i];
      }
    }

    $cnt = count( $buffer );

    if( $cnt > 0 ) {
      if( $cnt > 5 ) {
        $result[] = [ 't' => 'gap' ];
      } else {
        foreach( $buffer as $bufLine ) {
          $result[] = $bufLine;
        }
      }
    }

    return $result;
  }

  private function buildFallbackDiff(
    array $old,
    array $new,
    int $offset
  ): array {
    $stream = [];
    $currO  = $offset + 1;
    $currN  = $offset + 1;

    foreach( $old as $line ) {
      $stream[] = [
        't'  => '-',
        'l'  => $line,
        'no' => $currO++,
        'nn' => null
      ];
    }

    foreach( $new as $line ) {
      $stream[] = [
        't'  => '+',
        'l'  => $line,
        'no' => null,
        'nn' => $currN++
      ];
    }

    return $stream;
  }

  private function buildDiffStream( array $ops, int $start ): array {
    $stream = [];
    $currO  = $start + 1;
    $currN  = $start + 1;

    foreach( $ops as $op ) {
      $stream[] = [
        't'  => $op['t'],
        'l'  => $op['l'],
        'no' => $op['t'] === '+' ? null : $currO++,
        'nn' => $op['t'] === '-' ? null : $currN++
      ];
    }

    return $stream;
  }

  private function computeLCS( array $old, array $new ): array {
    $m = count( $old );
    $n = count( $new );
    $c = array_fill( 0, $m + 1, array_fill( 0, $n + 1, 0 ) );

    for( $i = 1; $i <= $m; $i++ ) {
      for( $j = 1; $j <= $n; $j++ ) {
        $c[$i][$j] = ($old[$i - 1] === $new[$j - 1])
          ? $c[$i - 1][$j - 1] + 1
          : max( $c[$i][$j - 1], $c[$i - 1][$j] );
      }
    }

    $diff = [];
    $i    = $m;
    $j    = $n;

    while( $i > 0 || $j > 0 ) {
      if( $i > 0 && $j > 0 && $old[$i - 1] === $new[$j - 1] ) {
        $diff[] = [ 't' => ' ', 'l' => $old[$i - 1] ];
        $i--;
        $j--;
      } elseif( $j > 0 && ($i === 0 || $c[$i][$j - 1] >= $c[$i - 1][$j]) ) {
        $diff[] = [ 't' => '+', 'l' => $new[$j - 1] ];
        $j--;
      } elseif( $i > 0 && ($j === 0 || $c[$i][$j - 1] < $c[$i - 1][$j]) ) {
        $diff[] = [ 't' => '-', 'l' => $old[$i - 1] ];
        $i--;
      }
    }

    return array_reverse( $diff );
  }
}