Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Simplifies file parsing

AuthorDave Jarvis <email>
Date2026-02-14 16:37:21 GMT-0800
Commitb8a8ad8dfead96e67a20033f0d434d6fba08737e
Parentd1bc5e9
git/Git.php
class Git {
- private const CHUNK_SIZE = 128;
- private const MAX_READ_SIZE = 1048576;
-
- private string $repoPath;
- private string $objectsPath;
-
- private GitRefs $refs;
- private GitPacks $packs;
-
- public function __construct( string $repoPath ) {
- $this->setRepository( $repoPath );
- }
-
- public function setRepository( string $repoPath ): void {
- $this->repoPath = rtrim( $repoPath, '/' );
- $this->objectsPath = $this->repoPath . '/objects';
-
- $this->refs = new GitRefs( $this->repoPath );
- $this->packs = new GitPacks( $this->objectsPath );
- }
-
- public function resolve( string $reference ): string {
- return $this->refs->resolve( $reference );
- }
-
- public function getMainBranch(): array {
- return $this->refs->getMainBranch();
- }
-
- public function eachBranch( callable $callback ): void {
- $this->refs->scanRefs( 'refs/heads', $callback );
- }
-
- public function eachTag( callable $callback ): void {
- $this->refs->scanRefs( 'refs/tags', function( $name, $sha ) use (
- $callback
- ) {
- $data = $this->read( $sha );
- $tag = $this->parseTagData( $name, $sha, $data );
-
- $callback( $tag );
- } );
- }
-
- private function parseTagData(
- string $name,
- string $sha,
- string $data
- ): Tag {
- $isAnnotated = strncmp( $data, 'object ', 7 ) === 0;
-
- $targetSha = $isAnnotated
- ? $this->extractPattern(
- $data,
- '/^object ([0-9a-f]{40})$/m',
- 1,
- $sha
- )
- : $sha;
-
- $pattern = $isAnnotated
- ? '/^tagger (.*) <(.*)> (\d+) [+\-]\d{4}$/m'
- : '/^author (.*) <(.*)> (\d+) [+\-]\d{4}$/m';
-
- $identity = $this->parseIdentity( $data, $pattern );
- $message = $this->extractMessage( $data );
-
- return new Tag(
- $name,
- $sha,
- $targetSha,
- $identity['timestamp'],
- $message,
- $identity['name']
- );
- }
-
- private function extractPattern(
- string $data,
- string $pattern,
- int $group,
- string $default = ''
- ): string {
- $matches = [];
-
- $result = preg_match( $pattern, $data, $matches )
- ? $matches[$group]
- : $default;
-
- return $result;
- }
-
- private function parseIdentity( string $data, string $pattern ): array {
- $matches = [];
- $found = preg_match( $pattern, $data, $matches );
-
- return [
- 'name' => $found ? trim( $matches[1] ) : 'Unknown',
- 'email' => $found ? $matches[2] : '',
- 'timestamp' => $found ? (int)$matches[3] : 0
- ];
- }
-
- private function extractMessage( string $data ): string {
- $pos = strpos( $data, "\n\n" );
-
- return $pos !== false ? trim( substr( $data, $pos + 2 ) ) : '';
- }
-
- public function getObjectSize( string $sha ): int {
- $size = $this->packs->getSize( $sha );
-
- return $size !== null ? $size : $this->getLooseObjectSize( $sha );
- }
-
- public function peek( string $sha, int $length = 255 ): string {
- $size = $this->packs->getSize( $sha );
-
- return $size === null
- ? $this->peekLooseObject( $sha, $length )
- : $this->packs->peek( $sha, $length ) ?? '';
- }
-
- public function read( string $sha ): string {
- $size = $this->getObjectSize( $sha );
-
- if( $size > self::MAX_READ_SIZE ) {
- return '';
- }
-
- $content = '';
-
- $this->slurp( $sha, function( $chunk ) use ( &$content ) {
- $content .= $chunk;
- } );
-
- return $content;
- }
-
- public function readFile( string $hash, string $name ) {
- return new File(
- $name,
- $hash,
- '100644',
- 0,
- $this->getObjectSize( $hash ),
- $this->peek( $hash )
- );
- }
-
- public function stream( string $sha, callable $callback ): void {
- $this->slurp( $sha, $callback );
- }
-
- private function slurp( string $sha, callable $callback ): void {
- $loosePath = $this->getLoosePath( $sha );
-
- if( is_file( $loosePath ) ) {
- $this->slurpLooseObject( $loosePath, $callback );
- } else {
- $this->slurpPackedObject( $sha, $callback );
- }
- }
-
- private function slurpLooseObject(
- string $path,
- callable $callback
- ): void {
- $this->withInflatedFile(
- $path,
- function( $fileHandle, $inflator ) use ( $callback ) {
- $buffer = '';
- $headerFound = false;
-
- while( !feof( $fileHandle ) ) {
- $chunk = fread( $fileHandle, 16384 );
- $inflatedChunk = inflate_add( $inflator, $chunk );
-
- if( $inflatedChunk === false ) {
- break;
- }
-
- $headerFound = $this->processInflatedChunk(
- $inflatedChunk,
- $headerFound,
- $buffer,
- $callback
- );
- }
- }
- );
- }
-
- private function withInflatedFile( string $path, callable $callback ): void {
- $fileHandle = fopen( $path, 'rb' );
- $inflator = $fileHandle ? inflate_init( ZLIB_ENCODING_DEFLATE ) : null;
-
- if( $fileHandle && $inflator ) {
- $callback( $fileHandle, $inflator );
- fclose( $fileHandle );
- }
- }
-
- private function processInflatedChunk(
- string $chunk,
- bool $headerFound,
- string &$buffer,
- callable $callback
- ): bool {
- if( !$headerFound ) {
- $buffer .= $chunk;
- $nullPos = strpos( $buffer, "\0" );
-
- if( $nullPos !== false ) {
- $body = substr( $buffer, $nullPos + 1 );
-
- if( $body !== '' ) {
- $callback( $body );
- }
-
- $buffer = '';
- return true;
- }
- } else {
- $callback( $chunk );
- }
-
- return $headerFound;
- }
-
- private function slurpPackedObject(
- string $sha,
- callable $callback
- ): void {
- $streamed = $this->packs->stream( $sha, $callback );
-
- if( !$streamed ) {
- $data = $this->packs->read( $sha );
-
- if( $data !== null && $data !== '' ) {
- $callback( $data );
- }
- }
- }
-
- private function peekLooseObject( string $sha, int $length ): string {
- $path = $this->getLoosePath( $sha );
-
- return is_file( $path )
- ? $this->inflateLooseObjectPrefix( $path, $length )
- : '';
- }
-
- private function inflateLooseObjectPrefix(
- string $path,
- int $length
- ): string {
- $buffer = '';
-
- $this->withInflatedFile(
- $path,
- function( $fileHandle, $inflator ) use ( $length, &$buffer ) {
- $headerFound = false;
-
- while( !feof( $fileHandle ) && strlen( $buffer ) < $length ) {
- $chunk = fread( $fileHandle, 128 );
- $inflated = inflate_add( $inflator, $chunk );
-
- if( $inflated === false ) {
- break;
- }
-
- $headerFound = $this->appendPrefixChunk(
- $inflated,
- $headerFound,
- $buffer
- );
- }
-
- $buffer = substr( $buffer, 0, $length );
- }
- );
-
- return $buffer;
- }
-
- private function appendPrefixChunk(
- string $chunk,
- bool $headerFound,
- string &$buffer
- ): bool {
- if( !$headerFound ) {
- $nullPos = strpos( $chunk, "\0" );
-
- if( $nullPos !== false ) {
- $buffer .= substr( $chunk, $nullPos + 1 );
- return true;
- }
- } else {
- $buffer .= $chunk;
- }
-
- return $headerFound;
- }
-
- public function history( string $ref, int $limit, callable $callback ): void {
- $currentSha = $this->resolve( $ref );
- $count = 0;
-
- while( $currentSha !== '' && $count < $limit ) {
- $commit = $this->parseCommit( $currentSha );
-
- if( $commit === null ) {
- break;
- }
-
- $callback( $commit );
- $currentSha = $commit->parentSha;
- $count++;
- }
- }
-
- private function parseCommit( string $sha ): ?object {
- $data = $this->read( $sha );
-
- return $data === '' ? null : $this->buildCommitObject( $sha, $data );
- }
-
- private function buildCommitObject( string $sha, string $data ): object {
- $identity = $this->parseIdentity( $data, '/^author (.*) <(.*)> (\d+)/m' );
- $message = $this->extractMessage( $data );
- $parentSha = $this->extractPattern(
- $data,
- '/^parent ([0-9a-f]{40})$/m',
- 1
- );
-
- return (object)[
- 'sha' => $sha,
- 'message' => $message,
- 'author' => $identity['name'],
- 'email' => $identity['email'],
- 'date' => $identity['timestamp'],
- 'parentSha' => $parentSha
- ];
- }
-
- public function walk( string $refOrSha, callable $callback ): void {
- $sha = $this->resolve( $refOrSha );
-
- if( $sha !== '' ) {
- $this->walkTree( $sha, $callback );
- }
- }
-
- private function walkTree( string $sha, callable $callback ): void {
- $data = $this->read( $sha );
- $treeData = $data !== '' && preg_match(
- '/^tree ([0-9a-f]{40})$/m',
- $data,
- $matches
- ) ? $this->read( $matches[1] ) : $data;
-
- if( $treeData !== '' && $this->isTreeData( $treeData ) ) {
- $this->processTree( $treeData, $callback );
- }
- }
-
- private function processTree( string $data, callable $callback ): void {
- $position = 0;
- $length = strlen( $data );
-
- while( $position < $length ) {
- $result = $this->parseTreeEntry( $data, $position, $length );
-
- if( $result === null ) {
- break;
- }
-
- $callback( $result['file'] );
- $position = $result['nextPosition'];
- }
- }
-
- private function parseTreeEntry(
- string $data,
- int $position,
- int $length
- ): ?array {
- $spacePos = strpos( $data, ' ', $position );
- $nullPos = strpos( $data, "\0", $spacePos );
-
- $hasValidPositions =
- $spacePos !== false &&
- $nullPos !== false &&
- $nullPos + 21 <= $length;
-
- return $hasValidPositions
- ? $this->buildTreeEntryResult( $data, $position, $spacePos, $nullPos )
- : null;
- }
-
- private function buildTreeEntryResult(
- string $data,
- int $position,
- int $spacePos,
- int $nullPos
- ): array {
- $mode = substr( $data, $position, $spacePos - $position );
- $name = substr( $data, $spacePos + 1, $nullPos - $spacePos - 1 );
- $sha = bin2hex( substr( $data, $nullPos + 1, 20 ) );
-
- $isDirectory = $mode === '40000' || $mode === '040000';
- $size = $isDirectory ? 0 : $this->getObjectSize( $sha );
- $contents = $isDirectory ? '' : $this->peek( $sha );
-
- $file = new File( $name, $sha, $mode, 0, $size, $contents );
-
- return [
- 'file' => $file,
- 'nextPosition' => $nullPos + 21
- ];
- }
-
- private function isTreeData( string $data ): bool {
- $pattern = '/^(40000|100644|100755|120000|160000) /';
- $minLength = strlen( $data ) >= 25;
- $matchesPattern = $minLength && preg_match( $pattern, $data );
- $nullPos = $matchesPattern ? strpos( $data, "\0" ) : false;
-
- return $matchesPattern &&
- $nullPos !== false &&
- $nullPos + 21 <= strlen( $data );
- }
-
- private function getLoosePath( string $sha ): string {
- return "{$this->objectsPath}/" .
- substr( $sha, 0, 2 ) . "/" .
- substr( $sha, 2 );
- }
-
- private function getLooseObjectSize( string $sha ): int {
- $path = $this->getLoosePath( $sha );
-
- return is_file( $path ) ? $this->readLooseObjectHeader( $path ) : 0;
- }
-
- private function readLooseObjectHeader( string $path ): int {
- $size = 0;
-
- $this->withInflatedFile(
- $path,
- function( $fileHandle, $inflator ) use ( &$size ) {
- $data = '';
-
- while( !feof( $fileHandle ) ) {
- $chunk = fread( $fileHandle, self::CHUNK_SIZE );
- $output = inflate_add( $inflator, $chunk, ZLIB_NO_FLUSH );
-
- if( $output === false ) {
- break;
- }
-
- $data .= $output;
-
- if( strpos( $data, "\0" ) !== false ) {
- break;
- }
- }
-
- $size = $this->parseSizeFromHeader( $data );
- }
- );
-
- return $size;
- }
-
- private function parseSizeFromHeader( string $data ): int {
- $header = explode( "\0", $data, 2 )[0];
- $parts = explode( ' ', $header );
-
- return isset( $parts[1] ) ? (int)$parts[1] : 0;
+ private const MAX_READ_SIZE = 1048576;
+
+ private string $repoPath;
+ private string $objectsPath;
+
+ private GitRefs $refs;
+ private GitPacks $packs;
+
+ public function __construct( string $repoPath ) {
+ $this->setRepository( $repoPath );
+ }
+
+ public function setRepository( string $repoPath ): void {
+ $this->repoPath = rtrim( $repoPath, '/' );
+ $this->objectsPath = $this->repoPath . '/objects';
+
+ $this->refs = new GitRefs( $this->repoPath );
+ $this->packs = new GitPacks( $this->objectsPath );
+ }
+
+ public function resolve( string $reference ): string {
+ return $this->refs->resolve( $reference );
+ }
+
+ public function getMainBranch(): array {
+ return $this->refs->getMainBranch();
+ }
+
+ public function eachBranch( callable $callback ): void {
+ $this->refs->scanRefs( 'refs/heads', $callback );
+ }
+
+ public function eachTag( callable $callback ): void {
+ $this->refs->scanRefs( 'refs/tags', function( $name, $sha ) use (
+ $callback
+ ) {
+ $data = $this->read( $sha );
+ $tag = $this->parseTagData( $name, $sha, $data );
+
+ $callback( $tag );
+ } );
+ }
+
+ private function parseTagData(
+ string $name,
+ string $sha,
+ string $data
+ ): Tag {
+ $isAnnotated = strncmp( $data, 'object ', 7 ) === 0;
+
+ $targetSha = $isAnnotated
+ ? $this->extractPattern(
+ $data,
+ '/^object ([0-9a-f]{40})$/m',
+ 1,
+ $sha
+ )
+ : $sha;
+
+ $pattern = $isAnnotated
+ ? '/^tagger (.*) <(.*)> (\d+) [+\-]\d{4}$/m'
+ : '/^author (.*) <(.*)> (\d+) [+\-]\d{4}$/m';
+
+ $identity = $this->parseIdentity( $data, $pattern );
+ $message = $this->extractMessage( $data );
+
+ return new Tag(
+ $name,
+ $sha,
+ $targetSha,
+ $identity['timestamp'],
+ $message,
+ $identity['name']
+ );
+ }
+
+ private function extractPattern(
+ string $data,
+ string $pattern,
+ int $group,
+ string $default = ''
+ ): string {
+ $matches = [];
+
+ $result = preg_match( $pattern, $data, $matches )
+ ? $matches[$group]
+ : $default;
+
+ return $result;
+ }
+
+ private function parseIdentity( string $data, string $pattern ): array {
+ $matches = [];
+ $found = preg_match( $pattern, $data, $matches );
+
+ return [
+ 'name' => $found ? trim( $matches[1] ) : 'Unknown',
+ 'email' => $found ? $matches[2] : '',
+ 'timestamp' => $found ? (int)$matches[3] : 0
+ ];
+ }
+
+ private function extractMessage( string $data ): string {
+ $pos = strpos( $data, "\n\n" );
+
+ return $pos !== false ? trim( substr( $data, $pos + 2 ) ) : '';
+ }
+
+ public function getObjectSize( string $sha ): int {
+ $size = $this->packs->getSize( $sha );
+
+ return $size !== null ? $size : $this->getLooseObjectSize( $sha );
+ }
+
+ public function peek( string $sha, int $length = 255 ): string {
+ $size = $this->packs->getSize( $sha );
+
+ return $size === null
+ ? $this->peekLooseObject( $sha, $length )
+ : $this->packs->peek( $sha, $length ) ?? '';
+ }
+
+ public function read( string $sha ): string {
+ $size = $this->getObjectSize( $sha );
+
+ if( $size > self::MAX_READ_SIZE ) {
+ return '';
+ }
+
+ $content = '';
+
+ $this->slurp( $sha, function( $chunk ) use ( &$content ) {
+ $content .= $chunk;
+ } );
+
+ return $content;
+ }
+
+ public function readFile( string $hash, string $name ) {
+ return new File(
+ $name,
+ $hash,
+ '100644',
+ 0,
+ $this->getObjectSize( $hash ),
+ $this->peek( $hash )
+ );
+ }
+
+ public function stream( string $sha, callable $callback ): void {
+ $this->slurp( $sha, $callback );
+ }
+
+ private function slurp( string $sha, callable $callback ): void {
+ $loosePath = $this->getLoosePath( $sha );
+
+ if( is_file( $loosePath ) ) {
+ $this->slurpLooseObject( $loosePath, $callback );
+ } else {
+ $this->slurpPackedObject( $sha, $callback );
+ }
+ }
+
+ private function iterateInflated( string $path, callable $processor ): void {
+ $this->withInflatedFile(
+ $path,
+ function( $fileHandle, $inflator ) use ( $processor ) {
+ $headerFound = false;
+ $buffer = '';
+
+ while( !feof( $fileHandle ) ) {
+ $chunk = fread( $fileHandle, 16384 );
+ $inflated = inflate_add( $inflator, $chunk );
+
+ if( $inflated === false ) {
+ break;
+ }
+
+ if( !$headerFound ) {
+ $buffer .= $inflated;
+ $nullPos = strpos( $buffer, "\0" );
+
+ if( $nullPos !== false ) {
+ $headerFound = true;
+ $header = substr( $buffer, 0, $nullPos );
+ $body = substr( $buffer, $nullPos + 1 );
+
+ if( $processor( $body, $header ) === false ) {
+ return;
+ }
+ }
+ } else {
+ if( $processor( $inflated, null ) === false ) {
+ return;
+ }
+ }
+ }
+ }
+ );
+ }
+
+ private function slurpLooseObject(
+ string $path,
+ callable $callback
+ ): void {
+ $this->iterateInflated(
+ $path,
+ function( $chunk ) use ( $callback ) {
+ if( $chunk !== '' ) {
+ $callback( $chunk );
+ }
+ return true;
+ }
+ );
+ }
+
+ private function withInflatedFile( string $path, callable $callback ): void {
+ $fileHandle = fopen( $path, 'rb' );
+ $inflator = $fileHandle ? inflate_init( ZLIB_ENCODING_DEFLATE ) : null;
+
+ if( $fileHandle && $inflator ) {
+ $callback( $fileHandle, $inflator );
+ fclose( $fileHandle );
+ }
+ }
+
+ private function slurpPackedObject(
+ string $sha,
+ callable $callback
+ ): void {
+ $streamed = $this->packs->stream( $sha, $callback );
+
+ if( !$streamed ) {
+ $data = $this->packs->read( $sha );
+
+ if( $data !== null && $data !== '' ) {
+ $callback( $data );
+ }
+ }
+ }
+
+ private function peekLooseObject( string $sha, int $length ): string {
+ $path = $this->getLoosePath( $sha );
+
+ return is_file( $path )
+ ? $this->inflateLooseObjectPrefix( $path, $length )
+ : '';
+ }
+
+ private function inflateLooseObjectPrefix(
+ string $path,
+ int $length
+ ): string {
+ $buffer = '';
+
+ $this->iterateInflated(
+ $path,
+ function( $chunk ) use ( $length, &$buffer ) {
+ $buffer .= $chunk;
+ return strlen( $buffer ) < $length;
+ }
+ );
+
+ return substr( $buffer, 0, $length );
+ }
+
+ public function history( string $ref, int $limit, callable $callback ): void {
+ $currentSha = $this->resolve( $ref );
+ $count = 0;
+
+ while( $currentSha !== '' && $count < $limit ) {
+ $commit = $this->parseCommit( $currentSha );
+
+ if( $commit === null ) {
+ break;
+ }
+
+ $callback( $commit );
+ $currentSha = $commit->parentSha;
+ $count++;
+ }
+ }
+
+ private function parseCommit( string $sha ): ?object {
+ $data = $this->read( $sha );
+
+ return $data === '' ? null : $this->buildCommitObject( $sha, $data );
+ }
+
+ private function buildCommitObject( string $sha, string $data ): object {
+ $identity = $this->parseIdentity( $data, '/^author (.*) <(.*)> (\d+)/m' );
+ $message = $this->extractMessage( $data );
+ $parentSha = $this->extractPattern(
+ $data,
+ '/^parent ([0-9a-f]{40})$/m',
+ 1
+ );
+
+ return (object)[
+ 'sha' => $sha,
+ 'message' => $message,
+ 'author' => $identity['name'],
+ 'email' => $identity['email'],
+ 'date' => $identity['timestamp'],
+ 'parentSha' => $parentSha
+ ];
+ }
+
+ public function walk( string $refOrSha, callable $callback ): void {
+ $sha = $this->resolve( $refOrSha );
+
+ if( $sha !== '' ) {
+ $this->walkTree( $sha, $callback );
+ }
+ }
+
+ private function walkTree( string $sha, callable $callback ): void {
+ $data = $this->read( $sha );
+ $treeData = $data !== '' && preg_match(
+ '/^tree ([0-9a-f]{40})$/m',
+ $data,
+ $matches
+ ) ? $this->read( $matches[1] ) : $data;
+
+ if( $treeData !== '' && $this->isTreeData( $treeData ) ) {
+ $this->processTree( $treeData, $callback );
+ }
+ }
+
+ private function processTree( string $data, callable $callback ): void {
+ $position = 0;
+ $length = strlen( $data );
+
+ while( $position < $length ) {
+ $result = $this->parseTreeEntry( $data, $position, $length );
+
+ if( $result === null ) {
+ break;
+ }
+
+ $callback( $result['file'] );
+ $position = $result['nextPosition'];
+ }
+ }
+
+ private function parseTreeEntry(
+ string $data,
+ int $position,
+ int $length
+ ): ?array {
+ $spacePos = strpos( $data, ' ', $position );
+ $nullPos = strpos( $data, "\0", $spacePos );
+
+ $hasValidPositions =
+ $spacePos !== false &&
+ $nullPos !== false &&
+ $nullPos + 21 <= $length;
+
+ return $hasValidPositions
+ ? $this->buildTreeEntryResult( $data, $position, $spacePos, $nullPos )
+ : null;
+ }
+
+ private function buildTreeEntryResult(
+ string $data,
+ int $position,
+ int $spacePos,
+ int $nullPos
+ ): array {
+ $mode = substr( $data, $position, $spacePos - $position );
+ $name = substr( $data, $spacePos + 1, $nullPos - $spacePos - 1 );
+ $sha = bin2hex( substr( $data, $nullPos + 1, 20 ) );
+
+ $isDirectory = $mode === '40000' || $mode === '040000';
+ $size = $isDirectory ? 0 : $this->getObjectSize( $sha );
+ $contents = $isDirectory ? '' : $this->peek( $sha );
+
+ $file = new File( $name, $sha, $mode, 0, $size, $contents );
+
+ return [
+ 'file' => $file,
+ 'nextPosition' => $nullPos + 21
+ ];
+ }
+
+ private function isTreeData( string $data ): bool {
+ $pattern = '/^(40000|100644|100755|120000|160000) /';
+ $minLength = strlen( $data ) >= 25;
+ $matchesPattern = $minLength && preg_match( $pattern, $data );
+ $nullPos = $matchesPattern ? strpos( $data, "\0" ) : false;
+
+ return $matchesPattern &&
+ $nullPos !== false &&
+ $nullPos + 21 <= strlen( $data );
+ }
+
+ private function getLoosePath( string $sha ): string {
+ return "{$this->objectsPath}/" .
+ substr( $sha, 0, 2 ) . "/" .
+ substr( $sha, 2 );
+ }
+
+ private function getLooseObjectSize( string $sha ): int {
+ $path = $this->getLoosePath( $sha );
+
+ return is_file( $path ) ? $this->readLooseObjectHeader( $path ) : 0;
+ }
+
+ private function readLooseObjectHeader( string $path ): int {
+ $size = 0;
+
+ $this->iterateInflated(
+ $path,
+ function( $chunk, $header ) use ( &$size ) {
+ if( $header !== null ) {
+ $parts = explode( ' ', $header );
+ $size = isset( $parts[1] ) ? (int)$parts[1] : 0;
+ }
+ return false;
+ }
+ );
+
+ return $size;
}
Delta423 lines added, 482 lines removed, 59-line decrease