<?php require_once __DIR__ . '/LanguageDefinitions.php'; class Highlighter { private string $content; private string $language; private array $rules; public function __construct( string $filename, string $content, string $mediaType ) { $this->content = $content; $this->language = $this->detectLanguage( $mediaType, $filename ); $this->rules = LanguageDefinitions::get( $this->language ) ?? []; } public function render(): string { $result = htmlspecialchars( $this->content ); if( !empty( $this->rules ) ) { $patterns = []; foreach( $this->rules as $name => $pattern ) { $delimiter = $pattern[0]; $inner = substr( $pattern, 1, strrpos( $pattern, $delimiter ) - 1 ); $inner = str_replace( '~', '\~', $inner ); $patterns[] = "(?P<{$name}>{$inner})"; } if( !in_array( $this->language, ['markdown', 'rmd'] ) ) { $patterns[] = "(?P<punctuation>[\\{\\}\\(\\)\\[\\]\\;\\,])"; } $patterns[] = "(?P<any>[\s\S])"; $combined = '~' . implode( '|', $patterns ) . '~msu'; $processed = preg_replace_callback( $combined, function( $matches ) { $output = htmlspecialchars( $matches[0] ); foreach( $matches as $key => $value ) { if( !is_numeric( $key ) && $value !== '' ) { if( $key === 'any' ) { $output = htmlspecialchars( $value ); } elseif( $key === 'string_interp' ) { $output = $this->renderInterpolatedString( $value ); } elseif( $key === 'math' ) { $output = $this->renderMath( $value ); } else { $output = $this->wrap( $value, 'hl-' . $key ); } break; } } return $output; }, $this->content ); if( $processed !== null ) { $result = $processed; } } return $result; } private function renderInterpolatedString( string $content ): string { $pattern = '/(\$\{[a-zA-Z0-9_]+\}|\$[a-zA-Z0-9_]+)/'; return $this->processSegments( $content, $pattern, function( $part ) { $out = htmlspecialchars( $part ); if( str_starts_with( $part, '${' ) && str_ends_with( $part, '}' ) ) { $inner = substr( $part, 2, -1 ); $out = $this->wrap( '${', 'hl-interp-punct', false ) . $this->wrap( $inner, 'hl-variable' ) . $this->wrap( '}', 'hl-interp-punct', false ); } elseif( str_starts_with( $part, '$' ) && strlen( $part ) > 1 ) { $inner = substr( $part, 1 ); $out = $this->wrap( '$', 'hl-interp-punct', false ) . $this->wrap( $inner, 'hl-variable' ); } else { $out = $this->wrap( $part, 'hl-string' ); } return $out; }); } private function renderMath( string $content ): string { return $this->processSegments( $content, '/(`[^`]+`)/', function( $part ) { if( str_starts_with( $part, '`' ) && str_ends_with( $part, '`' ) ) { return $this->wrap( $part, 'hl-function' ); } return $this->wrap( $part, 'hl-math' ); }); } private function processSegments( string $content, string $pattern, callable $callback ): string { $parts = preg_split( $pattern, $content, -1, PREG_SPLIT_DELIM_CAPTURE ); $output = ''; foreach( $parts as $part ) { if( $part !== '' ) { $output .= $callback( $part ); } } return $output; } private function wrap( string $content, string $className, bool $escape = true ): string { $safeContent = $escape ? htmlspecialchars( $content ) : $content; return '<span class="' . $className . '">' . $safeContent . '</span>'; } private function detectLanguage( string $mediaType, string $filename ): string { $basename = basename( $filename ); $extension = strtolower( pathinfo( $filename, PATHINFO_EXTENSION ) ); $language = null; $language = match( $basename ) { 'Containerfile', 'Dockerfile' => 'containerfile', 'Makefile' => 'makefile', 'Jenkinsfile' => 'groovy', default => null }; if( $language === null ) { $language = match( $extension ) { 'php', 'phtml', 'php8', 'php7' => 'php', 'c', 'h' => 'c', 'cpp', 'hpp', 'cc', 'cxx' => 'cpp', 'cs', 'csx' => 'csharp', 'java' => 'java', 'kt', 'kts' => 'kotlin', 'scala', 'sc' => 'scala', 'groovy', 'gvy' => 'groovy', 'js', 'jsx', 'mjs' => 'javascript', 'ts', 'tsx' => 'typescript', 'dart' => 'dart', 'swift' => 'swift', 'go' => 'go', 'rs' => 'rust', 'py', 'pyw' => 'python', 'rb', 'erb' => 'ruby', 'pl', 'pm', 't' => 'perl', 'lua' => 'lua', 'sh', 'bash', 'zsh' => 'bash', 'ps1', 'psm1', 'psd1' => 'powershell', 'bat', 'cmd' => 'batch', 'md', 'markdown' => 'markdown', 'rmd' => 'rmd', 'r' => 'r', 'xml', 'svg' => 'xml', 'html', 'htm' => 'html', 'css' => 'css', 'json', 'lock' => 'json', 'sql' => 'sql', 'yaml', 'yml' => 'yaml', 'gradle' => 'gradle', 'tex', 'sty', 'cls', 'ltx' => 'tex', 'properties', 'prop' => 'properties', 'ini', 'cfg', 'conf' => 'ini', 'toml' => 'toml', 'mk', 'mak' => 'makefile', 'diff', 'patch' => 'diff', default => null }; } if( $language === null ) { $language = match( $mediaType ) { 'text/x-php', 'application/x-php', 'application/x-httpd-php' => 'php', 'text/html' => 'html', 'text/css' => 'css', 'application/javascript', 'text/javascript', 'text/x-javascript' => 'javascript', 'application/json', 'text/json', 'application/x-json' => 'json', 'application/xml', 'text/xml', 'image/svg+xml' => 'xml', 'text/x-shellscript', 'application/x-sh' => 'bash', 'text/x-c', 'text/x-csrc' => 'c', 'text/x-c++src', 'text/x-c++', 'text/x-cpp' => 'cpp', 'text/x-csharp' => 'csharp', 'text/x-java', 'text/x-java-source', 'application/java-archive' => 'java', 'text/x-kotlin' => 'kotlin', 'text/x-scala' => 'scala', 'text/x-swift' => 'swift', 'text/x-python', 'application/x-python-code' => 'python', 'text/x-ruby', 'application/x-ruby' => 'ruby', 'text/x-perl', 'application/x-perl' => 'perl', 'text/x-go', 'text/go' => 'go', 'text/rust', 'text/x-rust' => 'rust', 'text/x-lua', 'text/lua' => 'lua', 'text/markdown', 'text/x-markdown' => 'markdown', 'text/x-r', 'text/x-r-source', 'application/R' => 'r', 'application/sql', 'text/sql', 'text/x-sql' => 'sql', 'text/yaml', 'text/x-yaml', 'application/yaml' => 'yaml', 'application/typescript', 'text/typescript' => 'typescript', 'text/x-gradle' => 'gradle', 'text/x-tex', 'application/x-tex' => 'tex', 'text/x-java-properties', 'text/properties' => 'properties', 'text/ini', 'application/x-ini' => 'ini', 'application/toml', 'text/toml' => 'toml', 'text/x-diff', 'text/x-patch' => 'diff', default => 'text' }; } return $language; } }