Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once 'File.php';

class GitDiff {
  private $git;

  public function __construct(Git $git) {
    $this->git = $git;
  }

  public function compare(string $commitHash) {
    $commitData = $this->git->read($commitHash);
    $parentHash = '';

    if (preg_match('/^parent ([0-9a-f]{40})/m', $commitData, $matches)) {
      $parentHash = $matches[1];
    }

    $newTree = $this->getTreeHash($commitHash);
    $oldTree = $parentHash ? $this->getTreeHash($parentHash) : null;

    return $this->diffTrees($oldTree, $newTree);
  }

  private function getTreeHash($commitSha) {
    $data = $this->git->read($commitSha);
    if (preg_match('/^tree ([0-9a-f]{40})/m', $data, $matches)) {
      return $matches[1];
    }
    return null;
  }

  private function diffTrees($oldTreeSha, $newTreeSha, $path = '') {
    $changes = [];

    if ($oldTreeSha === $newTreeSha) return [];

    $oldEntries = $oldTreeSha ? $this->parseTree($oldTreeSha) : [];
    $newEntries = $newTreeSha ? $this->parseTree($newTreeSha) : [];

    $allNames = array_unique(array_merge(array_keys($oldEntries), array_keys($newEntries)));
    sort($allNames);

    foreach ($allNames as $name) {
      $old = $oldEntries[$name] ?? null;
      $new = $newEntries[$name] ?? null;
      $currentPath = $path ? "$path/$name" : $name;

      if (!$old) {
        if ($new['is_dir']) {
           $changes = array_merge($changes, $this->diffTrees(null, $new['sha'], $currentPath));
        } else {
           $changes[] = $this->createChange('A', $currentPath, null, $new['sha']);
        }
      } elseif (!$new) {
        if ($old['is_dir']) {
           $changes = array_merge($changes, $this->diffTrees($old['sha'], null, $currentPath));
        } else {
           $changes[] = $this->createChange('D', $currentPath, $old['sha'], null);
        }
      } elseif ($old['sha'] !== $new['sha']) {
        if ($old['is_dir'] && $new['is_dir']) {
          $changes = array_merge($changes, $this->diffTrees($old['sha'], $new['sha'], $currentPath));
        } elseif (!$old['is_dir'] && !$new['is_dir']) {
          $changes[] = $this->createChange('M', $currentPath, $old['sha'], $new['sha']);
        }
      }
    }

    return $changes;
  }

  private function parseTree($sha) {
    $data = $this->git->read($sha);
    $entries = [];
    $len = strlen($data);
    $pos = 0;

    while ($pos < $len) {
      $space = strpos($data, ' ', $pos);
      $null = strpos($data, "\0", $space);

      if ($space === false || $null === false) break;

      $mode = substr($data, $pos, $space - $pos);
      $name = substr($data, $space + 1, $null - $space - 1);
      $hash = bin2hex(substr($data, $null + 1, 20));

      $entries[$name] = [
        'mode' => $mode,
        'sha' => $hash,
        'is_dir' => ($mode === '40000' || $mode === '040000')
      ];

      $pos = $null + 21;
    }
    return $entries;
  }

  private function createChange($type, $path, $oldSha, $newSha) {
    $oldContent = $oldSha ? $this->git->read($oldSha) : '';
    $newContent = $newSha ? $this->git->read($newSha) : '';

    $isBinary = false;

    if ($newSha) {
        $f = new VirtualDiffFile($path, $newContent);
        if ($f->isBinary()) $isBinary = true;
    }
    if (!$isBinary && $oldSha) {
        $f = new VirtualDiffFile($path, $oldContent);
        if ($f->isBinary()) $isBinary = true;
    }

    $diff = null;
    if (!$isBinary) {
      $diff = $this->calculateDiff($oldContent, $newContent);
    }

    return [
      'type' => $type,
      'path' => $path,
      'is_binary' => $isBinary,
      'hunks' => $diff
    ];
  }

  private function calculateDiff($old, $new) {
    // Normalize line endings
    $old = str_replace("\r\n", "\n", $old);
    $new = str_replace("\r\n", "\n", $new);

    $oldLines = explode("\n", $old);
    $newLines = explode("\n", $new);

    $m = count($oldLines);
    $n = count($newLines);

    // LCS Algorithm
    $start = 0;
    while ($start < $m && $start < $n && $oldLines[$start] === $newLines[$start]) {
      $start++;
    }

    $end = 0;
    while ($m - $end > $start && $n - $end > $start && $oldLines[$m - 1 - $end] === $newLines[$n - 1 - $end]) {
      $end++;
    }

    $oldSlice = array_slice($oldLines, $start, $m - $start - $end);
    $newSlice = array_slice($newLines, $start, $n - $start - $end);

    $ops = $this->computeLCS($oldSlice, $newSlice);

    // Grouping Optimization: Reorder interleaved +/- to be - then +
    $groupedOps = [];
    $bufferDel = [];
    $bufferAdd = [];

    foreach ($ops as $op) {
        if ($op['t'] === ' ') {
            foreach ($bufferDel as $o) $groupedOps[] = $o;
            foreach ($bufferAdd as $o) $groupedOps[] = $o;
            $bufferDel = [];
            $bufferAdd = [];
            $groupedOps[] = $op;
        } elseif ($op['t'] === '-') {
            $bufferDel[] = $op;
        } elseif ($op['t'] === '+') {
            $bufferAdd[] = $op;
        }
    }
    foreach ($bufferDel as $o) $groupedOps[] = $o;
    foreach ($bufferAdd as $o) $groupedOps[] = $o;
    $ops = $groupedOps;

    // Generate Stream with Context
    $stream = [];

    // Prefix context
    for ($i = 0; $i < $start; $i++) {
        $stream[] = ['t' => ' ', 'l' => $oldLines[$i], 'no' => $i + 1, 'nn' => $i + 1];
    }

    $currO = $start + 1;
    $currN = $start + 1;

    foreach ($ops as $op) {
        if ($op['t'] === ' ') {
            $stream[] = ['t' => ' ', 'l' => $op['l'], 'no' => $currO++, 'nn' => $currN++];
        } elseif ($op['t'] === '-') {
            $stream[] = ['t' => '-', 'l' => $op['l'], 'no' => $currO++, 'nn' => null];
        } elseif ($op['t'] === '+') {
            $stream[] = ['t' => '+', 'l' => $op['l'], 'no' => null, 'nn' => $currN++];
        }
    }

    // Suffix context
    for ($i = $m - $end; $i < $m; $i++) {
        $stream[] = ['t' => ' ', 'l' => $oldLines[$i], 'no' => $currO++, 'nn' => $currN++];
    }

    // Filter to Hunks
    $finalLines = [];
    $lastVisibleIndex = -1;
    $streamLen = count($stream);
    $contextLines = 3;

    for ($i = 0; $i < $streamLen; $i++) {
        $show = false;

        if ($stream[$i]['t'] !== ' ') {
            $show = true;
        } else {
            // Check ahead
            for ($j = 1; $j <= $contextLines; $j++) {
                if (($i + $j) < $streamLen && $stream[$i + $j]['t'] !== ' ') {
                    $show = true;
                    break;
                }
            }
            // Check behind
            if (!$show) {
                for ($j = 1; $j <= $contextLines; $j++) {
                    if (($i - $j) >= 0 && $stream[$i - $j]['t'] !== ' ') {
                        $show = true;
                        break;
                    }
                }
            }
        }

        if ($show) {
            if ($lastVisibleIndex !== -1 && $i > $lastVisibleIndex + 1) {
                $finalLines[] = ['t' => 'gap'];
            }
            $finalLines[] = $stream[$i];
            $lastVisibleIndex = $i;
        }
    }

    return $finalLines;
  }

  private function computeLCS($old, $new) {
    $m = count($old);
    $n = count($new);
    $c = array_fill(0, $m + 1, array_fill(0, $n + 1, 0));

    for ($i = 1; $i <= $m; $i++) {
      for ($j = 1; $j <= $n; $j++) {
        if ($old[$i-1] === $new[$j-1]) {
          $c[$i][$j] = $c[$i-1][$j-1] + 1;
        } else {
          $c[$i][$j] = max($c[$i][$j-1], $c[$i-1][$j]);
        }
      }
    }

    $diff = [];
    $i = $m; $j = $n;
    while ($i > 0 || $j > 0) {
      if ($i > 0 && $j > 0 && $old[$i-1] === $new[$j-1]) {
        array_unshift($diff, ['t' => ' ', 'l' => $old[$i-1]]);
        $i--; $j--;
      } elseif ($j > 0 && ($i === 0 || $c[$i][$j-1] >= $c[$i-1][$j])) {
        array_unshift($diff, ['t' => '+', 'l' => $new[$j-1]]);
        $j--;
      } elseif ($i > 0 && ($j === 0 || $c[$i][$j-1] < $c[$i-1][$j])) {
        array_unshift($diff, ['t' => '-', 'l' => $old[$i-1]]);
        $i--;
      }
    }
    return $diff;
  }
}

class VirtualDiffFile extends File {
  private $content;
  private $vName;

  public function __construct($name, $content) {
    parent::__construct($name, '', '100644', 0, strlen($content));
    $this->vName = $name;
    $this->content = $content;
  }

  public function isBinary(): bool {
    $buffer = substr($this->content, 0, 12);
    return MediaTypeSniffer::isBinary($buffer, $this->vName);
  }
}