| | <?php |
| | -require_once __DIR__ . '/LanguageDefinitions.php'; |
| | - |
| | -class Highlighter { |
| | - private string $content; |
| | - private string $lang; |
| | - private array $rules; |
| | - |
| | - public function __construct(string $content, string $lang) { |
| | - $this->content = $content; |
| | - $this->lang = strtolower($lang); |
| | - $this->rules = LanguageDefinitions::get($this->lang); |
| | - } |
| | - |
| | - public function render(): string { |
| | - if (empty($this->rules)) { |
| | - return htmlspecialchars($this->content); |
| | - } |
| | - |
| | - $patterns = []; |
| | - foreach ($this->rules as $name => $pattern) { |
| | - $delim = $pattern[0]; |
| | - $inner = substr($pattern, 1, strrpos($pattern, $delim) - 1); |
| | - $patterns[] = "(?P<$name>$inner)"; |
| | - } |
| | - |
| | - $patterns[] = "(?P<punctuation>[\\{\\}\\(\\)\\[\\]\\;\\,])"; |
| | - // The "any" pattern ensures NO text remains raw |
| | - $patterns[] = "(?P<any>[\s\S])"; |
| | - $combined = '/' . implode('|', $patterns) . '/msu'; |
| | - |
| | - return preg_replace_callback($combined, function ($matches) { |
| | - foreach ($matches as $key => $value) { |
| | - if (!is_numeric($key) && $value !== '') { |
| | - if ($key === 'string_interp') { |
| | - return $this->renderInterpolatedString($value); |
| | - } |
| | - if ($key === 'any') { |
| | - return htmlspecialchars($value); |
| | - } |
| | - return '<span class="hl-' . $key . '">' . htmlspecialchars($value) . '</span>'; |
| | - } |
| | - } |
| | - return htmlspecialchars($matches[0]); |
| | - }, $this->content); |
| | - } |
| | - |
| | - private function renderInterpolatedString(string $content): string { |
| | - $pattern = '/(\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*|\$\{[^}]+\})/'; |
| | - $parts = preg_split($pattern, $content, -1, PREG_SPLIT_DELIM_CAPTURE); |
| | - $output = '<span class="hl-string">'; |
| | - |
| | - foreach ($parts as $part) { |
| | - if (str_starts_with($part, '${') && str_ends_with($part, '}')) { |
| | - $inner = substr($part, 2, -1); |
| | - $output .= '<span class="hl-interp-punct">${</span>'; |
| | - $output .= '<span class="hl-variable">' . htmlspecialchars($inner) . '</span>'; |
| | - $output .= '<span class="hl-interp-punct">}</span>'; |
| | - } elseif (str_starts_with($part, '$')) { |
| | - $output .= '<span class="hl-interp-punct">$</span>'; |
| | - $output .= '<span class="hl-variable">' . htmlspecialchars(substr($part, 1)) . '</span>'; |
| | - } else { |
| | - $output .= htmlspecialchars($part); |
| | - } |
| | - } |
| | +class LanguageDefinitions { |
| | + public static function get(string $lang): array { |
| | + $int = '(-?\b\d+(\.\d+)?\b)'; |
| | + $str = '(".*?"|\'.*?\')'; |
| | + $float = '(-?\d+(\.\d+)?([eE][+-]?\d+)?)'; |
| | |
| | - $output .= '</span>'; |
| | + $rules = [ |
| | + 'php' => [ |
| | + 'tag' => '/(<\?php|<\?|=\?>|\?>)/', |
| | + 'string_interp' => '/(".*?")/', |
| | + 'string' => '/(\'.*?\')/', |
| | + 'comment' => '/(\/\/[^\r\n]*|#[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(class|abstract|and|array|as|break|callable|case|catch|clone|const|continue|declare|default|die|do|echo|else|elseif|empty|enddeclare|endfor|endforeach|endif|endswitch|endwhile|eval|exit|extends|final|finally|fn|for|foreach|function|global|goto|if|implements|include|include_once|instanceof|insteadof|interface|isset|list|match|namespace|new|or|print|private|protected|public|require|require_once|return|static|switch|throw|trait|try|unset|use|var|while|xor|yield)\b/', |
| | + 'function' => '/\b([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)\s*(?=\()/', |
| | + 'variable' => '/(\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)/', |
| | + 'number' => '/' . $int . '/', |
| | + 'boolean' => '/\b(true|false|null)\b/i', |
| | + ], |
| | + 'bash' => [ |
| | + 'string_interp' => '/(".*?")/', |
| | + 'string' => '/(\'.*?\')/', |
| | + 'comment' => '/(#[^\n]*)/', |
| | + 'keyword' => '/(?<!-)\b(alias|bg|bind|break|builtin|case|cd|command|compgen|complete|continue|declare|dirs|disown|do|done|echo|elif|else|enable|esac|eval|exec|exit|export|fc|fg|fi|for|function|getopts|hash|help|history|if|jobs|kill|let|local|logout|popd|printf|pushd|pwd|read|readonly|return|set|shift|shopt|source|suspend|test|then|times|trap|type|typeset|ulimit|umask|unalias|unset|until|wait|while)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'variable' => '/(\$[a-zA-Z_][a-zA-Z0-9_]*|\$\{[^}]+\})/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'batch' => [ |
| | + 'comment' => '/((?i:rem)\b[^\n]*|::[^\n]*)/', |
| | + 'string' => '/("[^"]*")/', |
| | + 'keyword' => '/(?i)\b(if|else|goto|for|in|do|call|exit|echo|pause|set|shift|start|cd|dir|copy|del|md|rd|cls|setlocal|endlocal|enabledelayedexpansion|defined|exist|not|errorlevel|setx|findstr|reg|nul|tokens|usebackq|equ|neq|lss|leq|gtr|geq)\b/', |
| | + 'variable' => '/(![\w-]+!|%[\w\(\)-]+%|%%[~a-zA-Z]+|%[~a-zA-Z0-9]+)/', |
| | + 'label' => '/(^\s*:[a-zA-Z0-9_-]+)/m', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'c' => [ |
| | + 'string' => '/' . $str . '/', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|register|return|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)\b/', |
| | + 'type' => '/\b(char|double|float|int|long|short|void)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'cpp' => [ |
| | + 'string' => '/' . $str . '/', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(alignas|alignof|and|and_eq|asm|auto|bitand|bitor|break|case|catch|class|compl|const|constexpr|const_cast|continue|decltype|default|delete|do|dynamic_cast|else|enum|explicit|export|extern|for|friend|goto|if|inline|mutable|namespace|new|noexcept|not|not_eq|nullptr|operator|or|or_eq|private|protected|public|register|reinterpret_cast|return|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|using|virtual|volatile|while|xor|xor_eq)\b/', |
| | + 'type' => '/\b(bool|char|char16_t|char32_t|double|float|int|long|short|signed|unsigned|void|wchar_t)\b/', |
| | + 'boolean' => '/\b(true|false)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'java' => [ |
| | + 'string' => '/' . $str . '/', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(abstract|assert|break|case|catch|class|const|continue|default|do|else|enum|extends|final|finally|for|goto|if|implements|import|instanceof|interface|native|new|package|private|protected|public|return|static|strictfp|super|switch|synchronized|this|throw|throws|transient|try|void|volatile|while)\b/', |
| | + 'type' => '/\b(boolean|byte|char|double|float|int|long|short|void)\b/', |
| | + 'boolean' => '/\b(true|false|null)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'go' => [ |
| | + 'string' => '/(".*?"|`.*?`)/s', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/', |
| | + 'boolean' => '/\b(true|false|nil|iota)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'rust' => [ |
| | + 'string' => '/(".*?"|\'.*?\')/', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(as|break|const|continue|crate|else|enum|extern|fn|for|if|impl|in|let|loop|match|mod|move|mut|pub|ref|return|self|Self|static|struct|super|trait|type|unsafe|use|where|while|async|await|dyn)\b/', |
| | + 'boolean' => '/\b(true|false)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'python' => [ |
| | + 'string' => '/(\'\'\'.*?\'\'\'|""".*?"""|".*?"|\'.*?\')/s', |
| | + 'comment' => '/(#[^\r\n]*)/m', |
| | + 'keyword' => '/\b(and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b/', |
| | + 'boolean' => '/\b(False|None|True)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'ruby' => [ |
| | + 'string_interp' => '/(".*?")/', |
| | + 'string' => '/(\'.*?\')/', |
| | + 'comment' => '/(#[^\r\n]*)/m', |
| | + 'keyword' => '/\b(alias|and|begin|break|case|class|def|defined|do|else|elsif|end|ensure|for|if|in|module|next|not|or|redo|rescue|retry|return|self|super|then|undef|unless|until|when|while|yield)\b/', |
| | + 'boolean' => '/\b(true|false|nil)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*[?!]?)\s*(?=\()/', |
| | + 'variable' => '/(@[a-zA-Z_]\w*|\$[a-zA-Z_]\w*)/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'lua' => [ |
| | + 'string' => '/(".*?"|\'.*?\'|\[\[.*?\]\])/s', |
| | + 'comment' => '/(--\[\[.*?\]\]|--[^\r\n]*)/ms', |
| | + 'keyword' => '/\b(and|break|do|else|elseif|end|for|function|if|in|local|not|or|repeat|return|then|until|while)\b/', |
| | + 'boolean' => '/\b(false|nil|true)\b/', |
| | + 'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'javascript' => [ |
| | + 'string' => '/(".*?"|\'.*?\'|`.*?`)/s', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|export|extends|finally|for|function|if|import|in|instanceof|new|return|super|switch|this|throw|try|typeof|var|void|while|with|yield|let|static|enum)\b/', |
| | + 'boolean' => '/\b(true|false|null|undefined)\b/', |
| | + 'function' => '/\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'typescript' => [ |
| | + 'string' => '/(".*?"|\'.*?\'|`.*?`)/s', |
| | + 'comment' => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/\b(any|as|break|case|catch|class|const|continue|debugger|declare|default|delete|do|else|enum|export|extends|finally|for|from|function|if|implements|import|in|instanceof|interface|let|module|namespace|new|of|package|private|protected|public|require|return|static|super|switch|this|throw|try|type|typeof|var|void|while|with|yield)\b/', |
| | + 'type' => '/\b(boolean|number|string|void|any)\b/', |
| | + 'boolean' => '/\b(true|false|null|undefined)\b/', |
| | + 'function' => '/\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*(?=\()/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'xml' => [ |
| | + 'comment' => '/()/s', |
| | + 'string' => '/' . $str . '/', |
| | + 'tag' => '/(<\/?[a-zA-Z0-9:-]+|\s*\/?>|<\?xml|\?>)/', |
| | + 'attribute' => '/([a-zA-Z0-9:-]+)(?=\=)/', |
| | + ], |
| | + 'html' => [ |
| | + 'comment' => '/()/s', |
| | + 'string' => '/' . $str . '/', |
| | + 'tag' => '/(<\/?[a-zA-Z0-9:-]+|\s*\/?>)/', |
| | + 'attribute' => '/([a-zA-Z0-9:-]+)(?=\=)/', |
| | + ], |
| | + 'css' => [ |
| | + 'comment' => '/(\/\*.*?\*\/)/s', |
| | + 'tag' => '/(?<=^|\}|\{)\s*([a-zA-Z0-9_\-#\.\s,>+~]+)(?=\{)/m', /* Selectors */ |
| | + 'property' => '/([a-zA-Z-]+)(?=\s*:)/', /* Properties */ |
| | + 'string' => '/' . $str . '/', |
| | + 'number' => '/(-?(\d*\.)?\d+(px|em|rem|%|vh|vw|s|ms|deg))/', |
| | + ], |
| | + 'json' => [ |
| | + 'attribute' => '/(".*?")(?=\s*:)/', /* Keys are attributes (orange) in Monokai */ |
| | + 'string' => '/(".*?")/', /* Values */ |
| | + 'boolean' => '/\b(true|false|null)\b/', |
| | + 'number' => '/\b(-?\d+(\.\d+)?([eE][+-]?\d+)?)\b/', |
| | + ], |
| | + 'sql' => [ |
| | + 'string' => '/(\'.*?\')/', |
| | + 'comment' => '/(--[^\r\n]*|\/\*.*?\*\/)/ms', |
| | + 'keyword' => '/(?i)\b(SELECT|FROM|WHERE|INSERT|INTO|UPDATE|DELETE|JOIN|LEFT|RIGHT|INNER|OUTER|ON|GROUP|BY|ORDER|HAVING|LIMIT|OFFSET|CREATE|TABLE|DROP|ALTER|INDEX|KEY|PRIMARY|FOREIGN|CONSTRAINT|DEFAULT|NOT|AND|OR|IN|VALUES|SET|AS|DISTINCT|UNION|ALL|CASE|WHEN|THEN|ELSE|END)\b/', |
| | + 'boolean' => '/(?i)\b(NULL|TRUE|FALSE)\b/', |
| | + 'number' => '/' . $int . '/', |
| | + ], |
| | + 'yaml' => [ |
| | + 'string' => '/' . $str . '/', |
| | + 'comment' => '/(#[^\r\n]*)/m', |
| | + 'attribute' => '/^(\s*[a-zA-Z0-9_-]+:)/m', |
| | + 'number' => '/' . $float . '/', |
| | + ], |
| | + 'markdown' => [ |
| | + 'comment' => '/()/s', |
| | + 'keyword' => '/^(#{1,6}\s+.*)$/m', |
| | + 'string' => '/(\*\*.*?\*\*|__.*?__|\*.*?\*|_.*?_)/', |
| | + 'variable' => '/(\[.*?\]\(.*?\))/', |
| | + 'number' => '/^(\s*[-*+]\s|\s*\d+\.\s)/m', |
| | + ], |
| | + 'rmd' => [ |
| | + 'comment' => '/()/s', |
| | + 'keyword' => '/^(#{1,6}\s+.*)$/m', |
| | + 'variable' => '/(`{3}\{r.*?`{3})/s', |
| | + ], |
| | + 'r' => [ |
| | + 'string' => '/' . $str . '/', |
| | + 'comment' => '/(#[^\r\n]*)/m', |
| | + 'keyword' => '/\b(if|else|repeat|while|function|for|in|next|break)\b/', |
| | + 'boolean' => '/\b(TRUE|FALSE|NULL|Inf|NaN|NA)\b/', |
| | + 'function' => '/\b([a-zA-Z_.][a-zA-Z0-9_.]*)\s*(?=\()/', |
| | + 'number' => '/' . $float . '/', |
| | + ] |
| | + ]; |
| | |
| | - return $output; |
| | + return $rules[strtolower($lang)] ?? []; |
| | } |
| | } |