<?php require_once __DIR__ . '/../File.php'; class GitDiff { private Git $git; private const MAX_DIFF_SIZE = 262144; public function __construct( Git $git ) { $this->git = $git; } public function diff( string $oldSha, string $newSha ): Generator { $oldTree = $oldSha !== '' ? $this->getTreeHash( $oldSha ) : ''; $newTree = $newSha !== '' ? $this->getTreeHash( $newSha ) : ''; yield from $this->diffTrees( $oldTree, $newTree ); } public function compare( string $commitHash ): Generator { $commitData = $this->git->read( $commitHash ); $parentHash = ''; if( preg_match( '/^parent ([0-9a-f]{40})/m', $commitData, $m ) ) { $parentHash = $m[1]; } $newTree = $this->getTreeHash( $commitHash ); $oldTree = $parentHash !== '' ? $this->getTreeHash( $parentHash ) : ''; yield from $this->diffTrees( $oldTree, $newTree ); } private function getTreeHash( string $commitSha ): string { $data = $this->git->read( $commitSha ); $result = ''; if( preg_match( '/^tree ([0-9a-f]{40})/m', $data, $matches ) ) { $result = $matches[1]; } return $result; } private function diffTrees( string $oldTreeSha, string $newTreeSha, string $path = '' ): Generator { if( $oldTreeSha !== $newTreeSha ) { $oldEntries = $oldTreeSha !== '' ? $this->parseTree( $oldTreeSha ) : []; $newEntries = $newTreeSha !== '' ? $this->parseTree( $newTreeSha ) : []; $allNames = array_unique( array_merge( array_keys( $oldEntries ), array_keys( $newEntries ) ) ); sort( $allNames ); foreach( $allNames as $name ) { $old = $oldEntries[$name] ?? null; $new = $newEntries[$name] ?? null; $currentPath = $path !== '' ? "$path/$name" : $name; if( !$old && $new ) { if( $new['file']->isDir() ) { yield from $this->diffTrees( '', $new['sha'], $currentPath ); } else { yield $this->createChange( 'A', $currentPath, '', $new['sha'], null, $new['file'] ); } } elseif( !$new && $old ) { if( $old['file']->isDir() ) { yield from $this->diffTrees( $old['sha'], '', $currentPath ); } else { yield $this->createChange( 'D', $currentPath, $old['sha'], '', $old['file'], null ); } } elseif( $old && $new && $old['sha'] !== $new['sha'] ) { if( $old['file']->isDir() && $new['file']->isDir() ) { yield from $this->diffTrees( $old['sha'], $new['sha'], $currentPath ); } elseif( !$old['file']->isDir() && !$new['file']->isDir() ) { yield $this->createChange( 'M', $currentPath, $old['sha'], $new['sha'], $old['file'], $new['file'] ); } } } } } private function parseTree( string $sha ): array { $data = $this->git->read( $sha ); $entries = []; $this->git->parseTreeData( $data, function( $file, $name, $hash, $mode ) use ( &$entries ) { $entries[$name] = [ 'file' => $file, 'sha' => $hash ]; } ); return $entries; } private function createChange( string $type, string $path, string $oldSha, string $newSha, ?File $oldFile = null, ?File $newFile = null ): array { $oldSize = $oldSha !== '' ? $this->git->getObjectSize( $oldSha ) : 0; $newSize = $newSha !== '' ? $this->git->getObjectSize( $newSha ) : 0; $result = []; if( $oldSize > self::MAX_DIFF_SIZE || $newSize > self::MAX_DIFF_SIZE ) { $result = [ 'type' => $type, 'path' => $path, 'is_binary' => true, 'hunks' => [] ]; } else { $oldContent = $oldSha !== '' ? $this->git->read( $oldSha ) : ''; $newContent = $newSha !== '' ? $this->git->read( $newSha ) : ''; $isBinary = false; if( $newFile !== null ) { $isBinary = $newFile->isBinary(); } elseif( $oldFile !== null ) { $isBinary = $oldFile->isBinary(); } $result = [ 'type' => $type, 'path' => $path, 'is_binary' => $isBinary, 'hunks' => $isBinary ? null : $this->calculateDiff( $oldContent, $newContent ) ]; } return $result; } private function calculateDiff( string $old, string $new ): array { $oldLines = explode( "\n", str_replace( "\r\n", "\n", $old ) ); $newLines = explode( "\n", str_replace( "\r\n", "\n", $new ) ); $m = count( $oldLines ); $n = count( $newLines ); $start = 0; $end = 0; while( $start < $m && $start < $n && $oldLines[$start] === $newLines[$start] ) { $start++; } while( $m - $end > $start && $n - $end > $start && $oldLines[$m - 1 - $end] === $newLines[$n - 1 - $end] ) { $end++; } $context = 2; $limit = 100000; $stream = []; $pStart = max( 0, $start - $context ); for( $i = $pStart; $i < $start; $i++ ) { $stream[] = [ 't' => ' ', 'l' => $oldLines[$i], 'no' => $i + 1, 'nn' => $i + 1 ]; } $oldSlice = array_slice( $oldLines, $start, $m - $start - $end ); $newSlice = array_slice( $newLines, $start, $n - $start - $end ); $mid = []; if( (count( $oldSlice ) * count( $newSlice )) > $limit ) { $mid = $this->buildFallbackDiff( $oldSlice, $newSlice, $start ); } else { $ops = $this->computeLCS( $oldSlice, $newSlice ); $mid = $this->buildDiffStream( $ops, $start ); } foreach( $mid as $line ) { $stream[] = $line; } $sLimit = min( $end, $context ); for( $i = 0; $i < $sLimit; $i++ ) { $idxO = $m - $end + $i; $idxN = $n - $end + $i; $stream[] = [ 't' => ' ', 'l' => $oldLines[$idxO], 'no' => $idxO + 1, 'nn' => $idxN + 1 ]; } return $this->formatDiffOutput( $stream ); } private function formatDiffOutput( array $stream ): array { $n = count( $stream ); $keep = array_fill( 0, $n, false ); $context = 2; for( $i = 0; $i < $n; $i++ ) { if( $stream[$i]['t'] !== ' ' ) { $low = max( 0, $i - $context ); $high = min( $n - 1, $i + $context ); for( $j = $low; $j <= $high; $j++ ) { $keep[$j] = true; } } } $result = []; $buffer = []; for( $i = 0; $i < $n; $i++ ) { if( $keep[$i] ) { $cnt = count( $buffer ); if( $cnt > 0 ) { if( $cnt > 5 ) { $result[] = [ 't' => 'gap' ]; } else { foreach( $buffer as $bufLine ) { $result[] = $bufLine; } } $buffer = []; } $result[] = $stream[$i]; } else { $buffer[] = $stream[$i]; } } $cnt = count( $buffer ); if( $cnt > 0 ) { if( $cnt > 5 ) { $result[] = [ 't' => 'gap' ]; } else { foreach( $buffer as $bufLine ) { $result[] = $bufLine; } } } return $result; } private function buildFallbackDiff( array $old, array $new, int $offset ): array { $stream = []; $currO = $offset + 1; $currN = $offset + 1; foreach( $old as $line ) { $stream[] = [ 't' => '-', 'l' => $line, 'no' => $currO++, 'nn' => null ]; } foreach( $new as $line ) { $stream[] = [ 't' => '+', 'l' => $line, 'no' => null, 'nn' => $currN++ ]; } return $stream; } private function buildDiffStream( array $ops, int $start ): array { $stream = []; $currO = $start + 1; $currN = $start + 1; foreach( $ops as $op ) { $stream[] = [ 't' => $op['t'], 'l' => $op['l'], 'no' => $op['t'] === '+' ? null : $currO++, 'nn' => $op['t'] === '-' ? null : $currN++ ]; } return $stream; } private function computeLCS( array $old, array $new ): array { $m = count( $old ); $n = count( $new ); $c = array_fill( 0, $m + 1, array_fill( 0, $n + 1, 0 ) ); for( $i = 1; $i <= $m; $i++ ) { for( $j = 1; $j <= $n; $j++ ) { $c[$i][$j] = ($old[$i - 1] === $new[$j - 1]) ? $c[$i - 1][$j - 1] + 1 : max( $c[$i][$j - 1], $c[$i - 1][$j] ); } } $diff = []; $i = $m; $j = $n; while( $i > 0 || $j > 0 ) { if( $i > 0 && $j > 0 && $old[$i - 1] === $new[$j - 1] ) { $diff[] = [ 't' => ' ', 'l' => $old[$i - 1] ]; $i--; $j--; } elseif( $j > 0 && ($i === 0 || $c[$i][$j - 1] >= $c[$i - 1][$j]) ) { $diff[] = [ 't' => '+', 'l' => $new[$j - 1] ]; $j--; } elseif( $i > 0 && ($j === 0 || $c[$i][$j - 1] < $c[$i - 1][$j]) ) { $diff[] = [ 't' => '-', 'l' => $old[$i - 1] ]; $i--; } } return array_reverse( $diff ); } }