| | <?php |
| | class MediaTypeSniffer { |
| | - private const BUFFER = 12; |
| | - private const ANY = -1; |
| | - |
| | - public const CAT_IMAGE = 'image'; |
| | - public const CAT_VIDEO = 'video'; |
| | - public const CAT_AUDIO = 'audio'; |
| | - public const CAT_TEXT = 'text'; |
| | + public const CAT_IMAGE = 'image'; |
| | + public const CAT_VIDEO = 'video'; |
| | + public const CAT_AUDIO = 'audio'; |
| | + public const CAT_TEXT = 'text'; |
| | public const CAT_ARCHIVE = 'archive'; |
| | - public const CAT_APP = 'application'; |
| | - public const CAT_BINARY = 'binary'; |
| | - |
| | - private const FORMATS = [ |
| | - [self::CAT_IMAGE, [0x3C, 0x73, 0x76, 0x67, 0x20], 'image/svg+xml'], |
| | - [self::CAT_IMAGE, [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], 'image/png'], |
| | - [self::CAT_IMAGE, [0xFF, 0xD8, 0xFF, 0xE0], 'image/jpeg'], |
| | - [self::CAT_IMAGE, [0xFF, 0xD8, 0xFF, 0xEE], 'image/jpeg'], |
| | - [self::CAT_IMAGE, [0xFF, 0xD8, 0xFF, 0xE1, self::ANY, self::ANY, 0x45, 0x78, 0x69, 0x66, 0x00], 'image/jpeg'], |
| | - [self::CAT_IMAGE, [0x47, 0x49, 0x46, 0x38], 'image/gif'], |
| | - [self::CAT_IMAGE, [0x42, 0x4D], 'image/bmp'], |
| | - [self::CAT_IMAGE, [0x49, 0x49, 0x2A, 0x00], 'image/tiff'], |
| | - [self::CAT_IMAGE, [0x4D, 0x4D, 0x00, 0x2A], 'image/tiff'], |
| | - [self::CAT_IMAGE, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x57, 0x45, 0x42, 0x50], 'image/webp'], |
| | - [self::CAT_IMAGE, [0x38, 0x42, 0x50, 0x53, 0x00, 0x01], 'image/vnd.adobe.photoshop'], |
| | - [self::CAT_IMAGE, [0x23, 0x64, 0x65, 0x66], 'image/x-xbitmap'], |
| | - [self::CAT_IMAGE, [0x21, 0x20, 0x58, 0x50, 0x4D, 0x32], 'image/x-xpixmap'], |
| | - [self::CAT_VIDEO, [0x8A, 0x4D, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], 'video/x-mng'], |
| | - [self::CAT_VIDEO, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x41, 0x56, 0x49, 0x20], 'video/x-msvideo'], |
| | - [self::CAT_VIDEO, [self::ANY, self::ANY, self::ANY, self::ANY, 0x66, 0x74, 0x79, 0x70], 'video/mp4'], |
| | - [self::CAT_VIDEO, [0x1A, 0x45, 0xDF, 0xA3], 'video/x-matroska'], |
| | - [self::CAT_VIDEO, [0x00, 0x00, 0x01, 0xBA], 'video/mpeg'], |
| | - [self::CAT_VIDEO, [0x46, 0x4C, 0x56, 0x01], 'video/x-flv'], |
| | - [self::CAT_TEXT, [0x3C, 0x21], 'text/html'], |
| | - [self::CAT_TEXT, [0x3C, 0x68, 0x74, 0x6D, 0x6C], 'text/html'], |
| | - [self::CAT_TEXT, [0x3C, 0x68, 0x65, 0x61, 0x64], 'text/html'], |
| | - [self::CAT_TEXT, [0x3C, 0x62, 0x6F, 0x64, 0x79], 'text/html'], |
| | - [self::CAT_TEXT, [0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20], 'text/xml'], |
| | - [self::CAT_TEXT, [0x25, 0x50, 0x44, 0x46, 0x2D], 'application/pdf'], |
| | - [self::CAT_TEXT, [0xEF, 0xBB, 0xBF], 'text/plain'], |
| | - [self::CAT_TEXT, [0xFE, 0xFF], 'text/plain'], |
| | - [self::CAT_TEXT, [0xFF, 0xFE], 'text/plain'], |
| | - [self::CAT_TEXT, [0x00, 0x00, 0xFE, 0xFF], 'text/plain'], |
| | - [self::CAT_TEXT, [0xFF, 0xFE, 0x00, 0x00], 'text/plain'], |
| | - [self::CAT_AUDIO, [0xFF, 0xFB, self::ANY], 'audio/mpeg'], |
| | - [self::CAT_AUDIO, [0x49, 0x44, 0x33], 'audio/mpeg'], |
| | - [self::CAT_AUDIO, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x57, 0x41, 0x56, 0x45], 'audio/wav'], |
| | - [self::CAT_AUDIO, [0x4F, 0x67, 0x67, 0x53], 'audio/ogg'], |
| | - [self::CAT_ARCHIVE, [0x50, 0x4B, 0x03, 0x04], 'application/zip'], |
| | - [self::CAT_ARCHIVE, [0x1F, 0x8B, 0x08], 'application/gzip'], |
| | - [self::CAT_APP, [0x7F, 0x45, 0x4C, 0x46], 'application/x-elf'] |
| | - ]; |
| | - |
| | - private const EXTENSION_MAP = [ |
| | - // Documentation / markup |
| | - 'md' => [self::CAT_TEXT, 'text/markdown'], |
| | - 'rmd' => [self::CAT_TEXT, 'text/r-markdown'], |
| | - 'txt' => [self::CAT_TEXT, 'text/plain'], |
| | - 'tex' => [self::CAT_TEXT, 'application/x-tex'], |
| | - 'lyx' => [self::CAT_TEXT, 'application/x-lyx'], |
| | - 'rst' => [self::CAT_TEXT, 'text/x-rst'], |
| | - 'asciidoc' => [self::CAT_TEXT, 'text/asciidoc'], |
| | - 'adoc' => [self::CAT_TEXT, 'text/asciidoc'], |
| | - 'org' => [self::CAT_TEXT, 'text/org'], |
| | - 'latex' => [self::CAT_TEXT, 'application/x-tex'], |
| | - 'csv' => [self::CAT_TEXT, 'text/csv'], |
| | - 'tsv' => [self::CAT_TEXT, 'text/tab-separated-values'], |
| | - 'psv' => [self::CAT_TEXT, 'text/plain'], |
| | - |
| | - 'json' => [self::CAT_TEXT, 'application/json'], |
| | - 'xml' => [self::CAT_TEXT, 'application/xml'], |
| | - 'gitignore' => [self::CAT_TEXT, 'text/plain'], |
| | - 'ts' => [self::CAT_TEXT, 'application/typescript'], |
| | - 'log' => [self::CAT_TEXT, 'text/plain'], |
| | - 'ndjson' => [self::CAT_TEXT, 'application/x-ndjson'], |
| | - 'conf' => [self::CAT_TEXT, 'text/plain'], |
| | - 'ini' => [self::CAT_TEXT, 'text/plain'], |
| | - 'yaml' => [self::CAT_TEXT, 'text/yaml'], |
| | - 'yml' => [self::CAT_TEXT, 'text/yaml'], |
| | - 'toml' => [self::CAT_TEXT, 'application/toml'], |
| | - 'env' => [self::CAT_TEXT, 'text/plain'], |
| | - 'cfg' => [self::CAT_TEXT, 'text/plain'], |
| | - 'properties'=> [self::CAT_TEXT, 'text/plain'], |
| | - 'dotenv' => [self::CAT_TEXT, 'text/plain'], |
| | - |
| | - // Programming languages |
| | - 'gradle' => [self::CAT_TEXT, 'text/plain'], |
| | - 'php' => [self::CAT_TEXT, 'application/x-php'], |
| | - 'sql' => [self::CAT_TEXT, 'application/sql'], |
| | - 'html' => [self::CAT_TEXT, 'text/html'], |
| | - 'xhtml' => [self::CAT_TEXT, 'text/xhtml'], |
| | - 'css' => [self::CAT_TEXT, 'text/css'], |
| | - 'js' => [self::CAT_TEXT, 'application/javascript'], |
| | - 'py' => [self::CAT_TEXT, 'text/x-python'], |
| | - 'rb' => [self::CAT_TEXT, 'text/x-ruby'], |
| | - 'java' => [self::CAT_TEXT, 'text/x-java-source'], |
| | - 'c' => [self::CAT_TEXT, 'text/x-csrc'], |
| | - 'cpp' => [self::CAT_TEXT, 'text/x-c++src'], |
| | - 'h' => [self::CAT_TEXT, 'text/x-chdr'], |
| | - 'cs' => [self::CAT_TEXT, 'text/x-csharp'], |
| | - 'go' => [self::CAT_TEXT, 'text/x-go'], |
| | - 'rs' => [self::CAT_TEXT, 'text/x-rust'], |
| | - 'swift' => [self::CAT_TEXT, 'text/x-swift'], |
| | - 'kt' => [self::CAT_TEXT, 'text/x-kotlin'], |
| | - 'kts' => [self::CAT_TEXT, 'text/x-kotlin'], |
| | - 'scala' => [self::CAT_TEXT, 'text/x-scala'], |
| | - 'dart' => [self::CAT_TEXT, 'text/x-dart'], |
| | - 'lua' => [self::CAT_TEXT, 'text/x-lua'], |
| | - 'pl' => [self::CAT_TEXT, 'text/x-perl'], |
| | - 'pm' => [self::CAT_TEXT, 'text/x-perl'], |
| | - 'r' => [self::CAT_TEXT, 'text/x-r'], |
| | - 'm' => [self::CAT_TEXT, 'text/x-matlab'], |
| | - 'jl' => [self::CAT_TEXT, 'text/x-julia'], |
| | + public const CAT_BINARY = 'binary'; |
| | |
| | - // Shell / scripting |
| | - 'sh' => [self::CAT_TEXT, 'application/x-sh'], |
| | - 'bash' => [self::CAT_TEXT, 'application/x-sh'], |
| | - 'zsh' => [self::CAT_TEXT, 'application/x-sh'], |
| | - 'fish' => [self::CAT_TEXT, 'text/plain'], |
| | - 'bat' => [self::CAT_TEXT, 'application/x-msdos-program'], |
| | - 'ps1' => [self::CAT_TEXT, 'application/x-powershell'] |
| | + private const ARCHIVE_EXTENSIONS = [ |
| | + 'zip', 'tar', 'gz', '7z', 'rar', 'jar', 'lha', 'bz', 'tgz', 'cab', |
| | + 'iso', 'dmg', 'xz', 'z', 'ar', 'war', 'ear', 'pak', 'hqx', 'arj', |
| | + 'zoo', 'rpm', 'deb', 'apk' |
| | ]; |
| | - |
| | - private static function getTypeInfo( string $data, string $filePath ): array { |
| | - $info = []; |
| | - $ext = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) ); |
| | - |
| | - if( $ext === 'svg' ){ |
| | - $info = [self::CAT_IMAGE, 'image/svg+xml']; |
| | - } |
| | - |
| | - if( empty( $info ) ){ |
| | - $info = self::sniff( $data ); |
| | - } |
| | - |
| | - if( empty( $info ) && !empty( $filePath ) ){ |
| | - $info = self::getInfoByExtension( $filePath ); |
| | - } |
| | - |
| | - if( empty( $info ) ){ |
| | - $info = [self::CAT_BINARY, 'application/octet-stream']; |
| | - } |
| | - |
| | - return $info; |
| | - } |
| | - |
| | - private static function sniff( string $data ): array { |
| | - $found = []; |
| | - $dataLength = strlen( $data ); |
| | - $maxScan = min( $dataLength, self::BUFFER ); |
| | - $sourceBytes = []; |
| | - |
| | - for( $i = 0; $i < $maxScan; $i++ ){ |
| | - $sourceBytes[$i] = ord( $data[$i] ) & 0xFF; |
| | - } |
| | - |
| | - foreach( self::FORMATS as [$category, $pattern, $type] ){ |
| | - $patternLength = count( $pattern ); |
| | - |
| | - if( $patternLength > $dataLength ){ |
| | - continue; |
| | - } |
| | - |
| | - $matches = true; |
| | - |
| | - for( $i = 0; $i < $patternLength; $i++ ){ |
| | - if( $pattern[$i] !== self::ANY && $pattern[$i] !== $sourceBytes[$i] ){ |
| | - $matches = false; |
| | - break; |
| | - } |
| | - } |
| | - |
| | - if( $matches ){ |
| | - $found = [$category, $type]; |
| | - break; |
| | - } |
| | - } |
| | - |
| | - return $found; |
| | - } |
| | - |
| | - private static function getInfoByExtension( string $filePath ): array { |
| | - $ext = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) ); |
| | - $info = self::EXTENSION_MAP[$ext] ?? [self::CAT_BINARY, 'application/octet-stream']; |
| | |
| | - return $info; |
| | + public static function isMediaType( |
| | + string $buffer, |
| | + string $filename = '' |
| | + ): string { |
| | + $finfo = new finfo( FILEINFO_MIME_TYPE ); |
| | + $mediaType = $finfo->buffer( $buffer ); |
| | + return $mediaType ?: 'application/octet-stream'; |
| | } |
| | |
| | - public static function isMediaType( string $data, string $filePath = '' ): string { |
| | - $info = self::getTypeInfo( $data, $filePath ); |
| | + public static function isCategory( |
| | + string $buffer, |
| | + string $filename = '' |
| | + ): string { |
| | + $mediaType = self::isMediaType( $buffer, $filename ); |
| | + $parts = explode( '/', $mediaType ); |
| | |
| | - return $info[1]; |
| | + return match( true ) { |
| | + $parts[0] === 'image' => self::CAT_IMAGE, |
| | + $parts[0] === 'video' => self::CAT_VIDEO, |
| | + $parts[0] === 'audio' => self::CAT_AUDIO, |
| | + $parts[0] === 'text' => self::CAT_TEXT, |
| | + self::isArchive( $filename ) => self::CAT_ARCHIVE, |
| | + str_contains( $mediaType, 'compressed' ) => self::CAT_ARCHIVE, |
| | + default => self::CAT_BINARY, |
| | + }; |
| | } |
| | - |
| | - public static function isCategory( string $data, string $filePath = '' ): string { |
| | - $info = self::getTypeInfo( $data, $filePath ); |
| | |
| | - return $info[0]; |
| | + public static function isBinary( |
| | + string $buffer, |
| | + string $filename = '' |
| | + ): bool { |
| | + return !str_starts_with( |
| | + self::isMediaType( $buffer, $filename ), |
| | + 'text/' |
| | + ); |
| | } |
| | - |
| | - public static function isBinary( string $data, string $filePath = '' ): bool { |
| | - $info = self::getTypeInfo( $data, $filePath ); |
| | - $category = $info[0]; |
| | - $type = $info[1]; |
| | |
| | - return !( |
| | - $category === self::CAT_TEXT || |
| | - str_starts_with( $type, 'text/' ) || |
| | - $type === 'image/svg+xml' |
| | + private static function isArchive( string $filename ): bool { |
| | + return in_array( |
| | + strtolower( pathinfo( $filename, PATHINFO_EXTENSION ) ), |
| | + self::ARCHIVE_EXTENSIONS, |
| | + true |
| | ); |
| | } |
| | } |
| | -?> |
| | |