Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git

Detects SVG as image not binary

AuthorDave Jarvis <email>
Date2026-02-09 12:39:13 GMT-0800
Commitd68cc3322d5db5beafbcdd36e70792be35ba8f37
Parent2e816ed
MediaTypeSniffer.php
private const ANY = -1;
- // Categories
public const CAT_IMAGE = 'image';
public const CAT_VIDEO = 'video';
public const CAT_AUDIO = 'audio';
public const CAT_TEXT = 'text';
public const CAT_ARCHIVE = 'archive';
public const CAT_APP = 'application';
public const CAT_BINARY = 'binary';
private const FORMATS = [
- // Images
[self::CAT_IMAGE, [0x3C, 0x73, 0x76, 0x67, 0x20], 'image/svg+xml'],
[self::CAT_IMAGE, [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], 'image/png'],
[self::CAT_IMAGE, [0x23, 0x64, 0x65, 0x66], 'image/x-xbitmap'],
[self::CAT_IMAGE, [0x21, 0x20, 0x58, 0x50, 0x4D, 0x32], 'image/x-xpixmap'],
-
- // Video
[self::CAT_VIDEO, [0x8A, 0x4D, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], 'video/x-mng'],
[self::CAT_VIDEO, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x41, 0x56, 0x49, 0x20], 'video/x-msvideo'],
[self::CAT_VIDEO, [self::ANY, self::ANY, self::ANY, self::ANY, 0x66, 0x74, 0x79, 0x70], 'video/mp4'],
[self::CAT_VIDEO, [0x1A, 0x45, 0xDF, 0xA3], 'video/x-matroska'],
[self::CAT_VIDEO, [0x00, 0x00, 0x01, 0xBA], 'video/mpeg'],
[self::CAT_VIDEO, [0x46, 0x4C, 0x56, 0x01], 'video/x-flv'],
-
- // Documents/Text
- [self::CAT_TEXT, [0x3C, 0x21], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x68, 0x74, 0x6D, 0x6C], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x68, 0x65, 0x61, 0x64], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x62, 0x6F, 0x64, 0x79], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x48, 0x54, 0x4D, 0x4C], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x48, 0x45, 0x41, 0x44], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x42, 0x4F, 0x44, 0x59], 'text/html'],
- [self::CAT_TEXT, [0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20], 'text/xml'],
- [self::CAT_TEXT, [0xFE, 0xFF, 0x00, 0x3C, 0x00, 0x3f, 0x00, 0x78], 'text/xml'],
- [self::CAT_TEXT, [0xFF, 0xFE, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00], 'text/xml'],
- [self::CAT_TEXT, [0x25, 0x50, 0x44, 0x46, 0x2D], 'application/pdf'],
- [self::CAT_TEXT, [0x25, 0x21, 0x50, 0x53, 0x2D, 0x41, 0x64, 0x6F, 0x62, 0x65, 0x2D], 'application/postscript'],
- [self::CAT_TEXT, [0x25, 0x21, 0x50, 0x53], 'application/postscript'],
-
- // Audio
+ [self::CAT_TEXT, [0x3C, 0x21], 'text/html'],
+ [self::CAT_TEXT, [0x3C, 0x68, 0x74, 0x6D, 0x6C], 'text/html'],
+ [self::CAT_TEXT, [0x3C, 0x68, 0x65, 0x61, 0x64], 'text/html'],
+ [self::CAT_TEXT, [0x3C, 0x62, 0x6F, 0x64, 0x79], 'text/html'],
+ [self::CAT_TEXT, [0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20], 'text/xml'],
+ [self::CAT_TEXT, [0x25, 0x50, 0x44, 0x46, 0x2D], 'application/pdf'],
[self::CAT_AUDIO, [0xFF, 0xFB, self::ANY], 'audio/mpeg'],
[self::CAT_AUDIO, [0x49, 0x44, 0x33], 'audio/mpeg'],
- [self::CAT_AUDIO, [0x2E, 0x73, 0x6E, 0x64], 'audio/basic'],
- [self::CAT_AUDIO, [0x64, 0x6E, 0x73, 0x2E], 'audio/basic'],
[self::CAT_AUDIO, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x57, 0x41, 0x56, 0x45], 'audio/wav'],
[self::CAT_AUDIO, [0x4F, 0x67, 0x67, 0x53], 'audio/ogg'],
- [self::CAT_AUDIO, [0x66, 0x4C, 0x61, 0x43], 'audio/flac'],
- [self::CAT_AUDIO, [0x4D, 0x54, 0x68, 0x64], 'audio/midi'],
- [self::CAT_AUDIO, [0x46, 0x4F, 0x52, 0x4D, self::ANY, self::ANY, self::ANY, self::ANY, 0x41, 0x49, 0x46, 0x46], 'audio/x-aiff'],
-
- // Archives
[self::CAT_ARCHIVE, [0x50, 0x4B, 0x03, 0x04], 'application/zip'],
- [self::CAT_ARCHIVE, [0x50, 0x4B, 0x05, 0x06], 'application/zip'],
- [self::CAT_ARCHIVE, [0x50, 0x4B, 0x07, 0x08], 'application/zip'],
[self::CAT_ARCHIVE, [0x1F, 0x8B, 0x08], 'application/gzip'],
- [self::CAT_ARCHIVE, [0x42, 0x5A, 0x68], 'application/x-bzip2'],
- [self::CAT_ARCHIVE, [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00], 'application/x-xz'],
- [self::CAT_ARCHIVE, [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07], 'application/vnd.rar'],
- [self::CAT_ARCHIVE, [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C], 'application/x-7z-compressed'],
-
- // Applications/System
- [self::CAT_APP, [0x41, 0x43, self::ANY, self::ANY, self::ANY, self::ANY, 0x00, 0x00, 0x00, 0x00, 0x00], 'application/acad'],
- [self::CAT_APP, [0xCA, 0xFE, 0xBA, 0xBE], 'application/java-vm'],
- [self::CAT_APP, [0xAC, 0xED], 'application/x-java-serialized-object'],
- [self::CAT_APP, [0x4D, 0x5A], 'application/x-msdownload'],
- [self::CAT_APP, [0x7F, 0x45, 0x4C, 0x46], 'application/x-elf'],
- [self::CAT_APP, [0xCE, 0xFA, 0xED, 0xFE], 'application/x-mach-binary'],
- [self::CAT_APP, [0xCF, 0xFA, 0xED, 0xFE], 'application/x-mach-binary'],
- [self::CAT_APP, [0xFE, 0xED, 0xFA, 0xCE], 'application/x-mach-binary'],
- [self::CAT_APP, [0xFE, 0xED, 0xFA, 0xCF], 'application/x-mach-binary'],
+ [self::CAT_APP, [0x7F, 0x45, 0x4C, 0x46], 'application/x-elf']
];
private const EXTENSION_MAP = [
- // Web & Markup
'html' => [self::CAT_TEXT, 'text/html'],
- 'htm' => [self::CAT_TEXT, 'text/html'],
- 'xhtml' => [self::CAT_TEXT, 'application/xhtml+xml'],
- 'css' => [self::CAT_TEXT, 'text/css'],
- 'scss' => [self::CAT_TEXT, 'text/x-scss'],
- 'less' => [self::CAT_TEXT, 'text/x-less'],
- 'js' => [self::CAT_TEXT, 'application/javascript'],
- 'jsx' => [self::CAT_TEXT, 'text/javascript'],
- 'ts' => [self::CAT_TEXT, 'text/x-typescript'],
- 'tsx' => [self::CAT_TEXT, 'text/x-typescript'],
- 'vue' => [self::CAT_TEXT, 'text/x-vue'],
+ 'css' => [self::CAT_TEXT, 'text/css'],
+ 'js' => [self::CAT_TEXT, 'application/javascript'],
'json' => [self::CAT_TEXT, 'application/json'],
- 'xml' => [self::CAT_TEXT, 'application/xml'],
- 'md' => [self::CAT_TEXT, 'text/markdown'],
- 'wasm' => [self::CAT_APP, 'application/wasm'],
-
- // Documents
- 'txt' => [self::CAT_TEXT, 'text/plain'],
- 'pdf' => [self::CAT_TEXT, 'application/pdf'],
- 'csv' => [self::CAT_TEXT, 'text/csv'],
- 'tsv' => [self::CAT_TEXT, 'text/tab-separated-values'],
- 'tex' => [self::CAT_TEXT, 'text/x-tex'],
- 'rst' => [self::CAT_TEXT, 'text/x-rst'],
-
- // Archives
- 'zip' => [self::CAT_ARCHIVE, 'application/zip'],
- 'jar' => [self::CAT_ARCHIVE, 'application/java-archive'],
- 'war' => [self::CAT_ARCHIVE, 'application/java-archive'],
- 'ear' => [self::CAT_ARCHIVE, 'application/java-archive'],
- 'gz' => [self::CAT_ARCHIVE, 'application/gzip'],
- 'bz2' => [self::CAT_ARCHIVE, 'application/x-bzip2'],
- 'xz' => [self::CAT_ARCHIVE, 'application/x-xz'],
- 'tar' => [self::CAT_ARCHIVE, 'application/x-tar'],
- 'rar' => [self::CAT_ARCHIVE, 'application/vnd.rar'],
- '7z' => [self::CAT_ARCHIVE, 'application/x-7z-compressed'],
-
- // Images
- 'jpg' => [self::CAT_IMAGE, 'image/jpeg'],
+ 'xml' => [self::CAT_TEXT, 'application/xml'],
+ 'md' => [self::CAT_TEXT, 'text/markdown'],
+ 'txt' => [self::CAT_TEXT, 'text/plain'],
+ 'zip' => [self::CAT_ARCHIVE, 'application/zip'],
+ 'jpg' => [self::CAT_IMAGE, 'image/jpeg'],
'jpeg' => [self::CAT_IMAGE, 'image/jpeg'],
- 'png' => [self::CAT_IMAGE, 'image/png'],
- 'gif' => [self::CAT_IMAGE, 'image/gif'],
- 'svg' => [self::CAT_IMAGE, 'image/svg+xml'],
+ 'png' => [self::CAT_IMAGE, 'image/png'],
+ 'gif' => [self::CAT_IMAGE, 'image/gif'],
+ 'svg' => [self::CAT_IMAGE, 'image/svg+xml'],
'webp' => [self::CAT_IMAGE, 'image/webp'],
- 'bmp' => [self::CAT_IMAGE, 'image/bmp'],
- 'tiff' => [self::CAT_IMAGE, 'image/tiff'],
- 'tif' => [self::CAT_IMAGE, 'image/tiff'],
- 'ico' => [self::CAT_IMAGE, 'image/x-icon'],
-
- // Video & Audio
- 'mp4' => [self::CAT_VIDEO, 'video/mp4'],
- 'avi' => [self::CAT_VIDEO, 'video/x-msvideo'],
- 'mov' => [self::CAT_VIDEO, 'video/quicktime'],
- 'wmv' => [self::CAT_VIDEO, 'video/x-ms-wmv'],
- 'flv' => [self::CAT_VIDEO, 'video/x-flv'],
- 'webm' => [self::CAT_VIDEO, 'video/webm'],
- 'mp3' => [self::CAT_AUDIO, 'audio/mpeg'],
- 'wav' => [self::CAT_AUDIO, 'audio/wav'],
- 'ogg' => [self::CAT_AUDIO, 'audio/ogg'],
- 'flac' => [self::CAT_AUDIO, 'audio/flac'],
- 'aac' => [self::CAT_AUDIO, 'audio/aac'],
-
- // Programming Languages
- 'php' => [self::CAT_TEXT, 'application/x-php'],
- 'py' => [self::CAT_TEXT, 'text/x-python'],
- 'rb' => [self::CAT_TEXT, 'text/x-ruby'],
- 'java' => [self::CAT_TEXT, 'text/x-java'],
- 'c' => [self::CAT_TEXT, 'text/x-c'],
- 'cpp' => [self::CAT_TEXT, 'text/x-c++'],
- 'h' => [self::CAT_TEXT, 'text/x-c'],
- 'hpp' => [self::CAT_TEXT, 'text/x-c++'],
- 'cs' => [self::CAT_TEXT, 'text/x-csharp'],
- 'go' => [self::CAT_TEXT, 'text/x-go'],
- 'rs' => [self::CAT_TEXT, 'text/x-rust'],
- 'pl' => [self::CAT_TEXT, 'text/x-perl'],
- 'lua' => [self::CAT_TEXT, 'text/x-lua'],
- 'swift' => [self::CAT_TEXT, 'text/x-swift'],
- 'kt' => [self::CAT_TEXT, 'text/x-kotlin'],
- 'kts' => [self::CAT_TEXT, 'text/x-kotlin'],
- 'dart' => [self::CAT_TEXT, 'text/x-dart'],
- 'r' => [self::CAT_TEXT, 'text/x-r'],
- 'sql' => [self::CAT_TEXT, 'application/sql'],
- 'sh' => [self::CAT_TEXT, 'application/x-sh'],
- 'bat' => [self::CAT_TEXT, 'application/x-bat'],
- 'ps1' => [self::CAT_APP, 'application/x-powershell'],
- 'el' => [self::CAT_TEXT, 'text/x-script.elisp'],
- 'clj' => [self::CAT_TEXT, 'text/x-clojure'],
- 'ex' => [self::CAT_TEXT, 'text/x-elixir'],
- 'hs' => [self::CAT_TEXT, 'text/x-haskell'],
- 'erl' => [self::CAT_TEXT, 'text/x-erlang'],
- 'm' => [self::CAT_TEXT, 'text/x-objectivec'],
- 'class' => [self::CAT_APP, 'application/java-vm'],
- 'groovy' => [self::CAT_TEXT, 'text/x-groovy'],
-
- // Config & System
+ 'mp4' => [self::CAT_VIDEO, 'video/mp4'],
+ 'mp3' => [self::CAT_AUDIO, 'audio/mpeg'],
+ 'php' => [self::CAT_TEXT, 'application/x-php'],
+ 'sql' => [self::CAT_TEXT, 'application/sql'],
'yaml' => [self::CAT_TEXT, 'text/yaml'],
- 'yml' => [self::CAT_TEXT, 'text/yaml'],
- 'toml' => [self::CAT_TEXT, 'application/toml'],
- 'ini' => [self::CAT_TEXT, 'text/plain'],
- 'cfg' => [self::CAT_TEXT, 'text/plain'],
- 'conf' => [self::CAT_TEXT, 'text/plain'],
- 'env' => [self::CAT_TEXT, 'text/plain'],
- 'gitignore' => [self::CAT_TEXT, 'text/plain'],
- 'dockerfile' => [self::CAT_TEXT, 'text/plain'],
- 'version' => [self::CAT_TEXT, 'text/plain'],
- 'gradle' => [self::CAT_TEXT, 'text/plain'],
- 'properties' => [self::CAT_TEXT, 'text/plain'],
+ 'yml' => [self::CAT_TEXT, 'text/yaml']
];
- /**
- * Internal helper to resolve category and mime type.
- * Guaranteed to return a non-empty array.
- */
private static function getTypeInfo( string $data, string $filePath ): array {
- $info = self::sniff( $data );
+ $info = [];
+ $ext = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) );
- if ( empty( $info ) && !empty( $filePath ) ) {
+ if( $ext === 'svg' ){
+ $info = [self::CAT_IMAGE, 'image/svg+xml'];
+ }
+
+ if( empty( $info ) ){
+ $info = self::sniff( $data );
+ }
+
+ if( empty( $info ) && !empty( $filePath ) ){
$info = self::getInfoByExtension( $filePath );
}
- return !empty( $info ) ? $info : [self::CAT_BINARY, 'application/octet-stream'];
+ if( empty( $info ) ){
+ $info = [self::CAT_BINARY, 'application/octet-stream'];
+ }
+
+ return $info;
}
private static function sniff( string $data ): array {
- if( empty( $data ) ) return [];
-
+ $found = [];
$dataLength = strlen( $data );
$maxScan = min( $dataLength, self::BUFFER );
$sourceBytes = [];
- for( $i = 0; $i < $maxScan; $i++ ) {
+ for( $i = 0; $i < $maxScan; $i++ ){
$sourceBytes[$i] = ord( $data[$i] ) & 0xFF;
}
- foreach( self::FORMATS as [$category, $pattern, $type] ) {
+ foreach( self::FORMATS as [$category, $pattern, $type] ){
$patternLength = count( $pattern );
- if( $patternLength > $dataLength ) continue;
+ if( $patternLength > $dataLength ){
+ continue;
+ }
$matches = true;
- for( $i = 0; $i < $patternLength; $i++ ) {
- if( $pattern[$i] !== self::ANY && $pattern[$i] !== $sourceBytes[$i] ) {
+ for( $i = 0; $i < $patternLength; $i++ ){
+ if( $pattern[$i] !== self::ANY && $pattern[$i] !== $sourceBytes[$i] ){
$matches = false;
break;
}
}
- if( $matches ) return [$category, $type];
+ if( $matches ){
+ $found = [$category, $type];
+ break;
+ }
}
- return [];
+ return $found;
}
private static function getInfoByExtension( string $filePath ): array {
- $extension = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) );
- return self::EXTENSION_MAP[$extension] ?? [self::CAT_BINARY, 'application/octet-stream'];
+ $ext = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) );
+ $info = self::EXTENSION_MAP[$ext] ?? [self::CAT_BINARY, 'application/octet-stream'];
+
+ return $info;
}
public static function isMediaType( string $data, string $filePath = '' ): string {
- return self::getTypeInfo( $data, $filePath )[1];
+ $info = self::getTypeInfo( $data, $filePath );
+
+ return $info[1];
}
public static function isCategory( string $data, string $filePath = '' ): string {
- return self::getTypeInfo( $data, $filePath )[0];
+ $info = self::getTypeInfo( $data, $filePath );
+
+ return $info[0];
}
public static function isBinary( string $data, string $filePath = '' ): bool {
- [$category, $type] = self::getTypeInfo( $data, $filePath );
- return $category !== self::CAT_TEXT && !str_starts_with( $type, 'text/' );
+ $info = self::getTypeInfo( $data, $filePath );
+ $category = $info[0];
+ $type = $info[1];
+
+ return
+ $category !== self::CAT_TEXT &&
+ !str_starts_with( $type, 'text/' ) &&
+ $type !== 'image/svg+xml';
}
}
?>
+
Delta70 lines added, 169 lines removed, 99-line decrease