Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Fixes file size determination

Author Dave Jarvis <email>
Date 2026-02-08 20:46:45 GMT-0800
Commit 0cf461a609dfb22f6efcf633009b02c204bd8f16
Parent db3d54f
new/Git.php
/**
- * Fix: Extracts size from the Git header without inflating the entire blob.
+ * Fix: Uses incremental inflation to safely read headers for both loose and packed objects.
+ * Resolves the true uncompressed size for Delta objects in packfiles.
*/
public function getObjectSize(string $sha): int {
$loose = "{$this->objPath}/" . substr($sha, 0, 2) . "/" . substr($sha, 2);
if (file_exists($loose)) {
$f = @fopen($loose, 'rb');
if (!$f) return 0;
- $data = fread($f, 128);
+
+ // Use inflate_init for safe partial decompression of loose headers
+ $ctx = inflate_init(ZLIB_ENCODING_DEFLATE);
+ $data = '';
+ while (!feof($f)) {
+ $chunk = fread($f, 128); // Read small chunks for the header
+ $data .= inflate_add($ctx, $chunk, ZLIB_NO_FLUSH);
+ if (strpos($data, "\0") !== false) break; // Stop once we have the header
+ }
fclose($f);
- $inflated = @gzuncompress($data);
- if (!$inflated) return 0;
- $header = explode("\0", $inflated, 2)[0];
+
+ $header = explode("\0", $data, 2)[0];
$parts = explode(' ', $header);
return isset($parts[1]) ? (int)$parts[1] : 0;
}
return $this->getPackedObjectSize($sha);
}
/**
- * Reads the Variable Length Quantity (VLQ) size from the packfile header.
+ * Reads size from packfile. Handles OBJ_OFS_DELTA and OBJ_REF_DELTA to return full file size.
*/
private function getPackedObjectSize(string $sha): int {
$info = $this->getPackOffset($sha);
if (!$info) return 0;
$pf = @fopen($info['file'], 'rb');
if (!$pf) return 0;
fseek($pf, $info['offset']);
+
+ // Read Pack Object Header
$byte = ord(fread($pf, 1));
+ $type = ($byte >> 4) & 7;
$size = $byte & 15;
$shift = 4;
while ($byte & 128) {
$byte = ord(fread($pf, 1));
$size |= (($byte & 127) << $shift);
$shift += 7;
+ }
+
+ // If it's a Delta, we must read the delta header to get the target (actual) size.
+ // Type 6 = OBJ_OFS_DELTA, Type 7 = OBJ_REF_DELTA
+ if ($type === 6 || $type === 7) {
+ if ($type === 6) {
+ // OFS_DELTA: Skip the variable-length offset
+ $byte = ord(fread($pf, 1));
+ while ($byte & 128) { $byte = ord(fread($pf, 1)); }
+ } else {
+ // REF_DELTA: Skip the 20-byte base SHA
+ fread($pf, 20);
+ }
+
+ // Inflate the start of the delta stream to get the target size
+ $ctx = inflate_init(ZLIB_ENCODING_DEFLATE);
+ $buffer = '';
+ $found = false;
+
+ // We only need the first two VLQ integers from the stream
+ while (!$found && !feof($pf)) {
+ $chunk = fread($pf, 512);
+ $buffer .= inflate_add($ctx, $chunk, ZLIB_NO_FLUSH);
+
+ // Check if we have enough bytes to decode two VLQs
+ // (Just a heuristic check, the decoding loop below handles the actual logic)
+ if (strlen($buffer) > 20) $found = true;
+ }
+
+ // Decode Delta Header: [Source Size VLQ] [Target Size VLQ]
+ $pos = 0;
+
+ // Skip Source Size
+ $byte = ord($buffer[$pos++]);
+ while ($byte & 128) { $byte = ord($buffer[$pos++]); }
+
+ // Read Target Size (The full file size)
+ $byte = ord($buffer[$pos++]);
+ $size = $byte & 127;
+ $shift = 7;
+ while ($byte & 128) {
+ $byte = ord($buffer[$pos++]);
+ $size |= (($byte & 127) << $shift);
+ $shift += 7;
+ }
}
}
- /**
- * Refactored fromPack to handle larger objects and accurate offsets.
- */
private function fromPack(string $sha): ?string {
$info = $this->getPackOffset($sha);
while ($byte & 128) { $byte = ord(fread($pf, 1)); }
- // Read a large enough chunk for decompression (up to 32MB for blobs)
$data = @gzuncompress(fread($pf, 33554432));
fclose($pf);
return $data ?: null;
}
- /**
- * Helper to find an object's location in .idx files.
- */
private function getPackOffset(string $sha): ?array {
$packs = glob("{$this->objPath}/pack/*.idx");
Delta 62 lines added, 13 lines removed, 49-line increase