Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
class MediaTypeSniffer {
  private const BUFFER = 12;
  private const ANY = -1;

  public const CAT_IMAGE   = 'image';
  public const CAT_VIDEO   = 'video';
  public const CAT_AUDIO   = 'audio';
  public const CAT_TEXT    = 'text';
  public const CAT_ARCHIVE = 'archive';
  public const CAT_APP     = 'application';
  public const CAT_BINARY  = 'binary';

  private const FORMATS = [
    [self::CAT_IMAGE, [0x3C, 0x73, 0x76, 0x67, 0x20], 'image/svg+xml'],
    [self::CAT_IMAGE, [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], 'image/png'],
    [self::CAT_IMAGE, [0xFF, 0xD8, 0xFF, 0xE0], 'image/jpeg'],
    [self::CAT_IMAGE, [0xFF, 0xD8, 0xFF, 0xEE], 'image/jpeg'],
    [self::CAT_IMAGE, [0xFF, 0xD8, 0xFF, 0xE1, self::ANY, self::ANY, 0x45, 0x78, 0x69, 0x66, 0x00], 'image/jpeg'],
    [self::CAT_IMAGE, [0x47, 0x49, 0x46, 0x38], 'image/gif'],
    [self::CAT_IMAGE, [0x42, 0x4D], 'image/bmp'],
    [self::CAT_IMAGE, [0x49, 0x49, 0x2A, 0x00], 'image/tiff'],
    [self::CAT_IMAGE, [0x4D, 0x4D, 0x00, 0x2A], 'image/tiff'],
    [self::CAT_IMAGE, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x57, 0x45, 0x42, 0x50], 'image/webp'],
    [self::CAT_IMAGE, [0x38, 0x42, 0x50, 0x53, 0x00, 0x01], 'image/vnd.adobe.photoshop'],
    [self::CAT_IMAGE, [0x23, 0x64, 0x65, 0x66], 'image/x-xbitmap'],
    [self::CAT_IMAGE, [0x21, 0x20, 0x58, 0x50, 0x4D, 0x32], 'image/x-xpixmap'],
    [self::CAT_VIDEO, [0x8A, 0x4D, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], 'video/x-mng'],
    [self::CAT_VIDEO, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x41, 0x56, 0x49, 0x20], 'video/x-msvideo'],
    [self::CAT_VIDEO, [self::ANY, self::ANY, self::ANY, self::ANY, 0x66, 0x74, 0x79, 0x70], 'video/mp4'],
    [self::CAT_VIDEO, [0x1A, 0x45, 0xDF, 0xA3], 'video/x-matroska'],
    [self::CAT_VIDEO, [0x00, 0x00, 0x01, 0xBA], 'video/mpeg'],
    [self::CAT_VIDEO, [0x46, 0x4C, 0x56, 0x01], 'video/x-flv'],
    [self::CAT_TEXT,  [0x3C, 0x21], 'text/html'],
    [self::CAT_TEXT,  [0x3C, 0x68, 0x74, 0x6D, 0x6C], 'text/html'],
    [self::CAT_TEXT,  [0x3C, 0x68, 0x65, 0x61, 0x64], 'text/html'],
    [self::CAT_TEXT,  [0x3C, 0x62, 0x6F, 0x64, 0x79], 'text/html'],
    [self::CAT_TEXT,  [0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20], 'text/xml'],
    [self::CAT_TEXT,  [0x25, 0x50, 0x44, 0x46, 0x2D], 'application/pdf'],
    [self::CAT_AUDIO, [0xFF, 0xFB, self::ANY], 'audio/mpeg'],
    [self::CAT_AUDIO, [0x49, 0x44, 0x33], 'audio/mpeg'],
    [self::CAT_AUDIO, [0x52, 0x49, 0x46, 0x46, self::ANY, self::ANY, self::ANY, self::ANY, 0x57, 0x41, 0x56, 0x45], 'audio/wav'],
    [self::CAT_AUDIO, [0x4F, 0x67, 0x67, 0x53], 'audio/ogg'],
    [self::CAT_ARCHIVE, [0x50, 0x4B, 0x03, 0x04], 'application/zip'],
    [self::CAT_ARCHIVE, [0x1F, 0x8B, 0x08], 'application/gzip'],
    [self::CAT_APP,   [0x7F, 0x45, 0x4C, 0x46], 'application/x-elf']
  ];

  private const EXTENSION_MAP = [
    'html'      => [self::CAT_TEXT, 'text/html'],
    'css'       => [self::CAT_TEXT, 'text/css'],
    'js'        => [self::CAT_TEXT, 'application/javascript'],
    'json'      => [self::CAT_TEXT, 'application/json'],
    'xml'       => [self::CAT_TEXT, 'application/xml'],
    'md'        => [self::CAT_TEXT, 'text/markdown'],
    'txt'       => [self::CAT_TEXT, 'text/plain'],
    'php'       => [self::CAT_TEXT, 'application/x-php'],
    'sql'       => [self::CAT_TEXT, 'application/sql'],
    'yaml'      => [self::CAT_TEXT, 'text/yaml'],
    'yml'       => [self::CAT_TEXT, 'text/yaml'],
    'gradle'    => [self::CAT_TEXT, 'text/plain'],
    'gitignore' => [self::CAT_TEXT, 'text/plain'],
    'sh'        => [self::CAT_TEXT, 'application/x-sh'],
    'tex'       => [self::CAT_TEXT, 'application/x-tex'],
    'bat'       => [self::CAT_TEXT, 'application/x-msdos-program'],
    'py'        => [self::CAT_TEXT, 'text/x-python'],
    'rb'        => [self::CAT_TEXT, 'text/x-ruby'],
    'java'      => [self::CAT_TEXT, 'text/x-java-source'],
    'c'         => [self::CAT_TEXT, 'text/x-csrc'],
    'cpp'       => [self::CAT_TEXT, 'text/x-c++src'],
    'h'         => [self::CAT_TEXT, 'text/x-chdr'],
    'cs'        => [self::CAT_TEXT, 'text/x-csharp'],
    'ts'        => [self::CAT_TEXT, 'application/typescript'],
    'log'       => [self::CAT_TEXT, 'text/plain'],
    'ini'       => [self::CAT_TEXT, 'text/plain'],
    'conf'      => [self::CAT_TEXT, 'text/plain'],
    'zip'       => [self::CAT_ARCHIVE, 'application/zip'],
    'jpg'       => [self::CAT_IMAGE, 'image/jpeg'],
    'jpeg'      => [self::CAT_IMAGE, 'image/jpeg'],
    'png'       => [self::CAT_IMAGE, 'image/png'],
    'gif'       => [self::CAT_IMAGE, 'image/gif'],
    'svg'       => [self::CAT_IMAGE, 'image/svg+xml'],
    'webp'      => [self::CAT_IMAGE, 'image/webp'],
    'mp4'       => [self::CAT_VIDEO, 'video/mp4'],
    'mp3'       => [self::CAT_AUDIO, 'audio/mpeg']
  ];

  private static function getTypeInfo( string $data, string $filePath ): array {
    $info = [];
    $ext = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) );

    if( $ext === 'svg' ){
      $info = [self::CAT_IMAGE, 'image/svg+xml'];
    }

    if( empty( $info ) ){
      $info = self::sniff( $data );
    }

    if( empty( $info ) && !empty( $filePath ) ){
      $info = self::getInfoByExtension( $filePath );
    }

    if( empty( $info ) ){
      $info = [self::CAT_BINARY, 'application/octet-stream'];
    }

    return $info;
  }

  private static function sniff( string $data ): array {
    $found = [];
    $dataLength = strlen( $data );
    $maxScan = min( $dataLength, self::BUFFER );
    $sourceBytes = [];

    for( $i = 0; $i < $maxScan; $i++ ){
      $sourceBytes[$i] = ord( $data[$i] ) & 0xFF;
    }

    foreach( self::FORMATS as [$category, $pattern, $type] ){
      $patternLength = count( $pattern );

      if( $patternLength > $dataLength ){
        continue;
      }

      $matches = true;

      for( $i = 0; $i < $patternLength; $i++ ){
        if( $pattern[$i] !== self::ANY && $pattern[$i] !== $sourceBytes[$i] ){
          $matches = false;
          break;
        }
      }

      if( $matches ){
        $found = [$category, $type];
        break;
      }
    }

    return $found;
  }

  private static function getInfoByExtension( string $filePath ): array {
    $ext = strtolower( pathinfo( $filePath, PATHINFO_EXTENSION ) );
    $info = self::EXTENSION_MAP[$ext] ?? [self::CAT_BINARY, 'application/octet-stream'];

    return $info;
  }

  public static function isMediaType( string $data, string $filePath = '' ): string {
    $info = self::getTypeInfo( $data, $filePath );

    return $info[1];
  }

  public static function isCategory( string $data, string $filePath = '' ): string {
    $info = self::getTypeInfo( $data, $filePath );

    return $info[0];
  }

  public static function isBinary( string $data, string $filePath = '' ): bool {
    $info = self::getTypeInfo( $data, $filePath );
    $category = $info[0];
    $type = $info[1];

    return !(
      $category === self::CAT_TEXT ||
      str_starts_with( $type, 'text/' ) ||
      $type === 'image/svg+xml'
    );
  }
}
?>