Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
require_once __DIR__ . '/LanguageDefinitions.php';

class Highlighter {
  private string $content;
  private string $lang;
  private array $rules;

  public function __construct( string $filename, string $content, string $mediaType ) {
    $this->content = $content;

    $this->lang = $this->detectLanguage( $mediaType, $filename );
    $this->rules = LanguageDefinitions::get( $this->lang ) ?? [];
  }

  public function render(): string {
    if( empty( $this->rules ) ) {
      return htmlspecialchars( $this->content );
    }

    $patterns = [];

    foreach( $this->rules as $name => $pattern ) {
      $delim = $pattern[0];
      $inner = substr( $pattern, 1, strrpos( $pattern, $delim ) - 1 );
      $inner = str_replace( '~', '\~', $inner );

      $patterns[] = "(?P<{$name}>{$inner})";
    }

    if( !in_array( $this->lang, ['markdown', 'rmd'] ) ) {
      $patterns[] = "(?P<punctuation>[\\{\\}\\(\\)\\[\\]\\;\\,])";
    }

    $patterns[] = "(?P<any>[\s\S])";
    $combined = '~' . implode( '|', $patterns ) . '~msu';

    $result = preg_replace_callback( $combined, function( $matches ) {
      foreach( $matches as $key => $value ) {
        if( !is_numeric( $key ) && $value !== '' ) {
          if( $key === 'any' ) {
            return htmlspecialchars( $value );
          }

          if( $key === 'string_interp' ) {
            return $this->renderInterpolatedString( $value );
          }

          if( $key === 'math' ) {
            return $this->renderMath( $value );
          }

          return '<span class="hl-' . $key . '">' . htmlspecialchars( $value ) . '</span>';
        }
      }

      return htmlspecialchars( $matches[0] );
    }, $this->content );

    return $result ?? htmlspecialchars( $this->content );
  }

  private function renderInterpolatedString( string $content ): string {
    $pattern = '/(\$\{[a-zA-Z0-9_]+\}|\$[a-zA-Z0-9_]+)/';
    $parts   = preg_split( $pattern, $content, -1, PREG_SPLIT_DELIM_CAPTURE );
    $output  = '<span class="hl-string">';

    foreach( $parts as $part ) {
      if( $part === '' ) continue;

      if( str_starts_with( $part, '${' ) && str_ends_with( $part, '}' ) ) {
        $inner = substr( $part, 2, -1 );
        $output .= '<span class="hl-interp-punct">${</span>';
        $output .= '<span class="hl-variable">' . htmlspecialchars( $inner ) . '</span>';
        $output .= '<span class="hl-interp-punct">}</span>';
      } elseif( str_starts_with( $part, '$' ) && strlen( $part ) > 1 ) {
         $output .= '<span class="hl-interp-punct">$</span>';
         $output .= '<span class="hl-variable">' . htmlspecialchars( substr( $part, 1 ) ) . '</span>';
      } else {
        $output .= htmlspecialchars( $part );
      }
    }

    $output .= '</span>';

    return $output;
  }

  private function renderMath( string $content ): string {
    $parts = preg_split( '/(`[^`]+`)/', $content, -1, PREG_SPLIT_DELIM_CAPTURE );
    $output = '';

    foreach( $parts as $part ) {
      if( $part === '' ) continue;

      if( str_starts_with( $part, '`' ) && str_ends_with( $part, '`' ) ) {
        $output .= '<span class="hl-function">' . htmlspecialchars( $part ) . '</span>';
      } else {
        $output .= '<span class="hl-math">' . htmlspecialchars( $part ) . '</span>';
      }
    }

    return $output;
  }

  private function detectLanguage( string $mediaType, string $filename ): string {
    $lang = match( $mediaType ) {
      'text/x-php', 'application/x-php', 'application/x-httpd-php' => 'php',
      'text/html' => 'html',
      'text/css' => 'css',
      'application/javascript', 'text/javascript', 'text/x-javascript' => 'javascript',
      'application/json', 'text/json', 'application/x-json' => 'json',
      'application/xml', 'text/xml', 'image/svg+xml' => 'xml',
      'text/x-shellscript', 'application/x-sh' => 'bash',
      'text/x-c', 'text/x-csrc' => 'c',
      'text/x-c++src', 'text/x-c++', 'text/x-cpp' => 'cpp',
      'text/x-java', 'text/x-java-source', 'application/java-archive' => 'java',
      'text/x-python', 'application/x-python-code' => 'python',
      'text/x-ruby', 'application/x-ruby' => 'ruby',
      'text/x-go', 'text/go' => 'go',
      'text/rust', 'text/x-rust' => 'rust',
      'text/x-lua', 'text/lua' => 'lua',
      'text/markdown', 'text/x-markdown' => 'markdown',
      'text/x-r', 'text/x-r-source', 'application/R' => 'r',
      'application/sql', 'text/sql', 'text/x-sql' => 'sql',
      'text/yaml', 'text/x-yaml', 'application/yaml' => 'yaml',
      'application/typescript', 'text/typescript' => 'typescript',
      'text/x-gradle' => 'gradle',
      'text/x-tex', 'application/x-tex' => 'tex',
      default => null
    };

    if( $lang !== null ) {
      return $lang;
    }

    $ext = strtolower( pathinfo( $filename, PATHINFO_EXTENSION ) );

    return match( $ext ) {
      'php', 'phtml', 'php8', 'php7' => 'php',
      'c', 'h' => 'c',
      'cpp', 'hpp', 'cc', 'cxx' => 'cpp',
      'java' => 'java',
      'js', 'jsx', 'mjs' => 'javascript',
      'ts', 'tsx' => 'typescript',
      'go' => 'go',
      'rs' => 'rust',
      'py', 'pyw' => 'python',
      'rb', 'erb' => 'ruby',
      'lua' => 'lua',
      'sh', 'bash', 'zsh' => 'bash',
      'bat', 'cmd' => 'batch',
      'md', 'markdown' => 'markdown',
      'rmd' => 'rmd',
      'r' => 'r',
      'xml', 'svg' => 'xml',
      'html', 'htm' => 'html',
      'css' => 'css',
      'json', 'lock' => 'json',
      'sql' => 'sql',
      'yaml', 'yml' => 'yaml',
      'gradle' => 'gradle',
      'tex', 'sty', 'cls', 'ltx' => 'tex',
      default => 'text'
    };
  }
}