Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once __DIR__ . '/LanguageDefinitions.php';

class Highlighter {
  private string $content;
  private string $language;
  private array $rules;

  public function __construct(
    string $filename,
    string $content,
    string $mediaType
  ) {
    $this->content  = $content;
    $this->language = $this->detectLanguage( $mediaType, $filename );
    $this->rules    = LanguageDefinitions::get( $this->language );
  }

  public function render(): string {
    $result = htmlspecialchars( $this->content );

    if( !empty( $this->rules ) ) {
      $patterns = [];

      foreach( $this->rules as $name => $pattern ) {
        $delimiter  = $pattern[0];
        $pos        = strrpos( $pattern, $delimiter ) - 1;
        $inner      = substr( $pattern, 1, $pos );
        $inner      = str_replace( '~', '\~', $inner );
        $patterns[] = "(?P<{$name}>{$inner})";
      }

      if( !in_array( $this->language, ['markdown', 'rmd'] ) ) {
        $patterns[] = "(?P<punctuation>[\\{\\}\\(\\)\\[\\]\\;\\,\\:])";
      }

      $patterns[] = "(?P<any>[\s\S])";
      $imploded   = implode( '|', $patterns );
      $combined   = '~' . $imploded . '~msu';

      $processed = preg_replace_callback( $combined, function( $matches ) {
        $output = htmlspecialchars( $matches[0] );

        foreach( $matches as $key => $value ) {
          if( !is_numeric( $key ) && $value !== '' ) {
            if( $key === 'any' ) {
              $output = htmlspecialchars( $value );
            } elseif( $key === 'string_interp' ) {
              $output = $this->renderInterpolatedString( $value );
            } elseif( $key === 'math' ) {
              $output = $this->renderMath( $value );
            } else {
              $output = $this->wrap( $value, 'hl-' . $key );
            }

            break;
          }
        }

        return $output;
      }, $this->content );

      if( is_string( $processed ) ) {
        $result = $processed;
      }
    }

    return $result;
  }

  private function renderInterpolatedString( string $content ): string {
    $pattern = '/(\$\{[a-zA-Z0-9_]+\}|\$[a-zA-Z0-9_]+)/';

    return $this->processSegments( $content, $pattern, function( $part ) {
      if( !str_starts_with( $part, '$' ) || strlen( $part ) <= 1 ) {
        $out = $this->wrap( $part, 'hl-string' );
      } else {
        $isComplex = str_starts_with( $part, '${' ) &&
                     str_ends_with( $part, '}' );

        $inner  = $isComplex ? substr( $part, 2, -1 ) : substr( $part, 1 );
        $prefix = $isComplex ? '${' : '$';
        $suffix = $isComplex
          ? $this->wrap( '}', 'hl-interp-punct', false )
          : '';

        $out = $this->wrap( $prefix, 'hl-interp-punct', false ) .
               $this->wrap( $inner, 'hl-variable' ) .
               $suffix;
      }

      return $out;
    } );
  }

  private function renderMath( string $content ): string {
    $pattern = '/(`[^`]+`)/';

    return $this->processSegments( $content, $pattern, function( $part ) {
      $output = $this->wrap( $part, 'hl-math' );

      if( str_starts_with( $part, '`' ) && str_ends_with( $part, '`' ) ) {
        $output = $this->wrap( $part, 'hl-function' );
      }

      return $output;
    } );
  }

  private function processSegments(
    string $content,
    string $pattern,
    callable $callback
  ): string {
    $parts  = preg_split( $pattern, $content, -1, PREG_SPLIT_DELIM_CAPTURE );
    $output = '';

    foreach( $parts as $part ) {
      if( $part !== '' ) {
        $output .= $callback( $part );
      }
    }

    return $output;
  }

  private function wrap(
    string $content,
    string $className,
    bool $escape = true
  ): string {
    $safeContent = $content;

    if( $escape ) {
      $safeContent = htmlspecialchars( $content );
    }

    return '<span class="' . $className . '">' . $safeContent . '</span>';
  }

  private function detectLanguage(
    string $mediaType,
    string $filename
  ): string {
    $basename  = basename( $filename );
    $extension = strtolower( pathinfo( $filename, PATHINFO_EXTENSION ) );
    $language  = match( $basename ) {
      'Containerfile',
      'Dockerfile'  => 'containerfile',
      'Makefile'    => 'makefile',
      'Jenkinsfile' => 'groovy',
      default       => ''
    };

    if( $language === '' ) {
      $language = match( $extension ) {
        'php', 'phtml', 'php8', 'php7' => 'php',
        'c', 'h'                       => 'c',
        'cpp', 'hpp', 'cc', 'cxx'      => 'cpp',
        'cs', 'csx'                    => 'csharp',
        'java'                         => 'java',
        'kt', 'kts'                    => 'kotlin',
        'scala', 'sc'                  => 'scala',
        'groovy', 'gvy'                => 'groovy',
        'js', 'jsx', 'mjs'             => 'javascript',
        'ts', 'tsx'                    => 'typescript',
        'dart'                         => 'dart',
        'swift'                        => 'swift',
        'go'                           => 'go',
        'rs'                           => 'rust',
        'py', 'pyw'                    => 'python',
        'rb', 'erb'                    => 'ruby',
        'pl', 'pm', 't'                => 'perl',
        'lua'                          => 'lua',
        'sh', 'bash', 'zsh'            => 'bash',
        'ps1', 'psm1', 'psd1'          => 'powershell',
        'bat', 'cmd'                   => 'batch',
        'md', 'markdown'               => 'markdown',
        'rmd'                          => 'rmd',
        'r'                            => 'r',
        'xml', 'svg'                   => 'xml',
        'xsl', 'xslt'                  => 'xslt',
        'html', 'htm'                  => 'html',
        'css'                          => 'css',
        'json', 'lock'                 => 'json',
        'sql'                          => 'sql',
        'yaml', 'yml'                  => 'yaml',
        'gradle'                       => 'gradle',
        'tex', 'sty', 'cls', 'ltx'     => 'tex',
        'properties', 'prop'           => 'properties',
        'ini', 'cfg', 'conf'           => 'ini',
        'toml'                         => 'toml',
        'mk', 'mak'                    => 'makefile',
        'diff', 'patch'                => 'diff',
        'for', 'f', 'f90', 'f95'       => 'fortran',
        default                        => ''
      };
    }

    if( $language === '' ) {
       $language = match( $mediaType ) {
        'text/x-php', 'application/x-php',
        'application/x-httpd-php'           => 'php',
        'text/html'                         => 'html',
        'text/css'                          => 'css',
        'application/javascript',
        'text/javascript',
        'text/x-javascript'                 => 'javascript',
        'application/json', 'text/json',
        'application/x-json'                => 'json',
        'application/xml', 'text/xml',
        'image/svg+xml'                     => 'xml',
        'application/xslt+xml'              => 'xslt',
        'text/x-shellscript',
        'application/x-sh'                  => 'bash',
        'text/x-c', 'text/x-csrc'           => 'c',
        'text/x-c++src', 'text/x-c++',
        'text/x-cpp'                        => 'cpp',
        'text/x-csharp'                     => 'csharp',
        'text/x-java',
        'text/x-java-source',
        'application/java-archive'          => 'java',
        'text/x-kotlin'                     => 'kotlin',
        'text/x-scala'                      => 'scala',
        'text/x-swift'                      => 'swift',
        'text/x-python',
        'application/x-python-code'         => 'python',
        'text/x-ruby', 'application/x-ruby' => 'ruby',
        'text/x-perl', 'application/x-perl' => 'perl',
        'text/x-go', 'text/go'              => 'go',
        'text/rust', 'text/x-rust'          => 'rust',
        'text/x-lua', 'text/lua'            => 'lua',
        'text/markdown',
        'text/x-markdown'                   => 'markdown',
        'text/x-r', 'text/x-r-source',
        'application/R'                     => 'r',
        'application/sql', 'text/sql',
        'text/x-sql'                        => 'sql',
        'text/yaml', 'text/x-yaml',
        'application/yaml'                  => 'yaml',
        'application/typescript',
        'text/typescript'                   => 'typescript',
        'text/x-gradle'                     => 'gradle',
        'text/x-tex', 'application/x-tex'   => 'tex',
        'text/x-java-properties',
        'text/properties'                   => 'properties',
        'text/ini', 'application/x-ini'     => 'ini',
        'application/toml', 'text/toml'     => 'toml',
        'text/x-diff', 'text/x-patch'       => 'diff',
        'text/x-fortran'                    => 'fortran',
        default                             => 'text'
      };
    }

    return $language;
  }
}