Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/treetrek.git
<?php
class LanguageDefinitions {
  public static function get( string $lang ): array {
    $int   = '(-?\b\d+(\.\d+)?\b)';
    $str   = '("(?:\\\\.|[^"\\\\])*"|\'(?:\\\\.|[^\'\\\\])*\')';
    $float = '(-?\d+(\.\d+)?([eE][+-]?\d+)?)';

    $rules = [
      'gradle' => [
        'comment'       => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'string_interp' => '/("(?:\\\\.|[^"\\\\])*"|""".*?""")/',
        'string'        => '/(\'(?:\\\\.|[^\'\\\\])*\'|\'\'\'.*?\'\'\'|\/.*?\/)/',
        'keyword'       => '/\b(def|task|group|version|ext|return|if|else)\b/',
        'function'      => '/\b(apply|plugin|sourceCompatibility|targetCompatibility|repositories|dependencies|test|plugins|buildscript|allprojects|subprojects|project|implementation|api|compileOnly|runtimeOnly|testImplementation|testRuntimeOnly|mavenCentral|google|jcenter|classpath)\b|\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\(|{)/',
        'variable'      => '/(\$[a-zA-Z_][a-zA-Z0-9_]*|\$\{[^}]+\})/',
        'boolean'       => '/\b(true|false|null)\b/',
        'number'        => '/' . $int . '/',
      ],
      'tex' => [
        'comment'  => '/(%[^\r\n]*)/m',
        'math'     => '/(\$\$?.*?\$\$?)/s',
        'keyword'  => '/(\\\\(?:def|edef|gdef|xdef|let|futurelet|if|else|fi|ifnum|ifdim|ifodd|ifmmode|ifx|ifeof|iftrue|iffalse|ifcase|or|loop|repeat|newif|expandafter|noexpand|csname|endcsname|string|number|the|long|outer|global|par|advance|hsize|vsize|hoffset|voffset|displaywidth|parindent|baselineskip|leftskip|rightskip|hangindent|hangafter|parshape|pageno|nopagenumbers|folio|headline|footline|hbox|vbox|vtop|vcenter|rlap|llap|hskip|vskip|hfil|hfill|hfilneg|vfil|vfill|mskip|quad|qquad|enspace|thinspace|enskip|strut|phantom|vphantom|hphantom|smash|raise|lower|moveleft|moveright|halign|valign|noalign|openup|cr|crcr|omit|span|multispan|tabskip|settabs|matrix|pmatrix|bordermatrix|eqalign|displaylines|eqno|leqno|cases|left|right|over|atop|choose|brace|brack|root|of|buildrel|input|end|bye|item|itemitem|indent|noindent|narrower|rm|bf|tt|sl|it|font|char|magnification|magstep|magstephalf|day|month|year|jobname|romannumeral|uppercase|lowercase|footnote|topinsert|pageinsert|midinsert|endinsert|underbar|hfuzz|vfuzz|overfullrule|raggedright|raggedbottom|everypar|everymath|everydisplay|everycr))\b/',
        'function' => '/(\\\\[a-zA-Z@]+|\\\\[^a-zA-Z@])/',
        'variable' => '/(#[0-9])/',
      ],
      'php' => [
        'tag'           => '/(<\?php|<\?|=\?>|\?>)/',
        'string_interp' => '/("(?:\\\\.|[^"\\\\])*")/',
        'string'        => '/(\'(?:\\\\.|[^\'\\\\])*\')/',
        'comment'       => '/(\/\/[^\r\n]*|#[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'       => '/\b(class|abstract|and|array|as|break|callable|case|catch|clone|const|continue|declare|default|die|do|echo|else|elseif|empty|enddeclare|endfor|endforeach|endif|endswitch|endwhile|eval|exit|extends|final|finally|fn|for|foreach|function|global|goto|if|implements|include|include_once|instanceof|insteadof|interface|isset|list|match|namespace|new|or|print|private|protected|public|require|require_once|return|static|switch|throw|trait|try|unset|use|var|while|xor|yield)\b/',
        'function'      => '/\b([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)\s*(?=\()/',
        'variable'      => '/(\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)/',
        'number'        => '/' . $int . '/',
        'boolean'       => '/\b(true|false|null)\b/i',
      ],
      'bash' => [
        'string_interp' => '/("(?:\\\\.|[^"\\\\])*")/',
        'string'        => '/(\'.*?\')/',
        'comment'       => '/(#[^\n]*)/',
        'keyword'       => '/(?<!-)\b(alias|bg|bind|break|builtin|case|cd|command|compgen|complete|continue|declare|dirs|disown|do|done|echo|elif|else|enable|esac|eval|exec|exit|export|fc|fg|fi|for|function|getopts|hash|help|history|if|jobs|kill|let|local|logout|popd|printf|pushd|pwd|read|readonly|return|set|shift|shopt|source|suspend|test|then|times|trap|type|typeset|ulimit|umask|unalias|unset|until|wait|while)\b/',
        'function'      => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'variable'      => '/(\$[a-zA-Z_][a-zA-Z0-9_]*|\$\{[^}]+\})/',
        'number'        => '/' . $int . '/',
      ],
      'batch' => [
        'comment'  => '/((?i:rem)\b[^\n]*|::[^\n]*)/',
        'string'   => '/("[^"]*")/',
        'keyword'  => '/(?i)\b(if|else|goto|for|in|do|exit|echo|pause|set|shift|start|cd|dir|copy|del|md|rd|cls|setlocal|endlocal|enabledelayedexpansion|defined|exist|not|errorlevel|setx|findstr|reg|nul|tokens|usebackq|equ|neq|lss|leq|gtr|geq)\b/',
        'function' => '/(?i)\b(call)\b/',
        'variable' => '/(![\w-]+!|%[\w\(\)-]+%|%%[~a-zA-Z]+|%[~a-zA-Z0-9]+)/',
        'label'    => '/(^\s*:[a-zA-Z0-9_-]+)/m',
        'number'   => '/' . $int . '/',
      ],
      'c' => [
        'string'       => '/' . $str . '/',
        'comment'      => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'include'      => '/(^\s*#include[^\r\n]*)/m',
        'preprocessor' => '/(^\s*#(?!include\b)[^\r\n]*)/m',
        'keyword'      => '/\b(auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|noreturn|register|return|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)\b/',
        'type'         => '/\b(char|double|float|int|long|short|void)\b/',
        'function'     => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'       => '/' . $int . '/',
      ],
      'cpp' => [
        'string'       => '/' . $str . '/',
        'comment'      => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'include'      => '/(^\s*#include[^\r\n]*)/m',
        'preprocessor' => '/(^\s*#(?!include\b)[^\r\n]*)/m',
        'keyword'      => '/\b(alignas|alignof|and|and_eq|asm|auto|bitand|bitor|break|case|catch|class|compl|const|constexpr|const_cast|continue|decltype|default|delete|do|dynamic_cast|else|enum|explicit|export|extern|for|friend|goto|if|inline|mutable|namespace|new|noexcept|noreturn|not|not_eq|nullptr|operator|or|or_eq|private|protected|public|register|reinterpret_cast|return|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|using|virtual|volatile|while|xor|xor_eq)\b/',
        'type'         => '/\b(bool|char|char16_t|char32_t|double|float|int|long|short|signed|unsigned|void|wchar_t)\b/',
        'boolean'      => '/\b(true|false)\b/',
        'function'     => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'       => '/' . $int . '/',
      ],
      'java' => [
        'class'    => '/(@[a-zA-Z_][a-zA-Z0-9_]*)/',
        'string'   => '/' . $str . '/',
        'comment'  => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'  => '/\b(abstract|assert|break|case|catch|class|const|continue|default|do|else|enum|extends|final|finally|for|goto|if|implements|import|instanceof|interface|native|new|package|private|protected|public|return|static|strictfp|super|switch|synchronized|this|throw|throws|transient|try|void|volatile|while)\b/',
        'type'     => '/\b(boolean|byte|char|double|float|int|long|short|void)\b/',
        'boolean'  => '/\b(true|false|null)\b/',
        'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'go' => [
        'string'   => '/("(?:\\\\.|[^"\\\\])*"|`.*?`)/s',
        'comment'  => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'  => '/\b(break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/',
        'boolean'  => '/\b(true|false|nil|iota)\b/',
        'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'rust' => [
        'string'   => '/' . $str . '/',
        'comment'  => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'  => '/\b(as|break|const|continue|crate|else|enum|extern|fn|for|if|impl|in|let|loop|match|mod|move|mut|pub|ref|return|self|Self|static|struct|super|trait|type|unsafe|use|where|while|async|await|dyn)\b/',
        'boolean'  => '/\b(true|false)\b/',
        'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'python' => [
        'string'   => '/(\'\'\'.*?\'\'\'|""".*?"""|"(?:\\\\.|[^"\\\\])*"|\'(?:\\\\.|[^\'\\\\])*\')/s',
        'comment'  => '/(#[^\r\n]*)/m',
        'keyword'  => '/\b(and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b/',
        'boolean'  => '/\b(False|None|True)\b/',
        'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'ruby' => [
        'string_interp' => '/("(?:\\\\.|[^"\\\\])*")/',
        'string'        => '/(\'(?:\\\\.|[^\'\\\\])*\')/',
        'comment'       => '/(#[^\r\n]*)/m',
        'keyword'       => '/\b(alias|and|begin|break|case|class|def|defined|do|else|elsif|end|ensure|for|if|in|module|next|not|or|redo|rescue|retry|return|self|super|then|undef|unless|until|when|while|yield)\b/',
        'boolean'       => '/\b(true|false|nil)\b/',
        'function'      => '/\b([a-zA-Z_][a-zA-Z0-9_]*[?!]?)\s*(?=\()/',
        'variable'      => '/(@[a-zA-Z_]\w*|\$[a-zA-Z_]\w*)/',
        'number'        => '/' . $int . '/',
      ],
      'lua' => [
        'string'   => '/("(?:\\\\.|[^"\\\\])*"|\'(?:\\\\.|[^\'\\\\])*\'|\[\[.*?\]\])/s',
        'comment'  => '/(--\[\[.*?\]\]|--[^\r\n]*)/ms',
        'keyword'  => '/\b(and|break|do|else|elseif|end|for|function|if|in|local|not|or|repeat|return|then|until|while)\b/',
        'boolean'  => '/\b(false|nil|true)\b/',
        'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'javascript' => [
        'string'   => '/("(?:\\\\.|[^"\\\\])*"|\'(?:\\\\.|[^\'\\\\])*\'|`(?:\\\\.|[^`\\\\])*`)/s',
        'comment'  => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'  => '/\b(async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|export|extends|finally|for|function|if|import|in|instanceof|new|return|super|switch|this|throw|try|typeof|var|void|while|with|yield|let|static|enum)\b/',
        'boolean'  => '/\b(true|false|null|undefined)\b/',
        'function' => '/\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'typescript' => [
        'string'   => '/("(?:\\\\.|[^"\\\\])*"|\'(?:\\\\.|[^\'\\\\])*\'|`(?:\\\\.|[^`\\\\])*`)/s',
        'comment'  => '/(\/\/[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'  => '/\b(any|as|break|case|catch|class|const|continue|debugger|declare|default|delete|do|else|enum|export|extends|finally|for|from|function|if|implements|import|in|instanceof|interface|let|module|namespace|new|of|package|private|protected|public|require|return|static|super|switch|this|throw|try|type|typeof|var|void|while|with|yield)\b/',
        'type'     => '/\b(boolean|number|string|void|any)\b/',
        'boolean'  => '/\b(true|false|null|undefined)\b/',
        'function' => '/\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'xml' => [
        'comment'   => '/()/s',
        'string'    => '/' . $str . '/',
        'tag'       => '/(<\/?[a-zA-Z0-9:-]+|\s*\/?>|<\?xml|\?>)/',
        'attribute' => '/([a-zA-Z0-9:-]+)(?=\=)/',
      ],
      'html' => [
        'comment'   => '/()/s',
        'string'    => '/' . $str . '/',
        'tag'       => '/(<\/?[a-zA-Z0-9:-]+|\s*\/?>)/',
        'attribute' => '/([a-zA-Z0-9:-]+)(?=\=)/',
      ],
      'css' => [
        'comment'  => '/(\/\*.*?\*\/)/s',
        'tag'      => '/(?<=^|\}|\{)\s*([a-zA-Z0-9_\-#\.\s,>+~]+)(?=\{)/m',
        'property' => '/([a-zA-Z-]+)(?=\s*:)/',
        'string'   => '/' . $str . '/',
        'number'   => '/(-?(\d*\.)?\d+(px|em|rem|%|vh|vw|s|ms|deg))/',
      ],
      'json' => [
        'attribute' => '/("(?:\\\\.|[^"\\\\])*")(?=\s*:)/',
        'string'    => '/("(?:\\\\.|[^"\\\\])*")/',
        'boolean'   => '/\b(true|false|null)\b/',
        'number'    => '/\b(-?\d+(\.\d+)?([eE][+-]?\d+)?)\b/',
      ],
      'sql' => [
        'string'   => '/(\'.*?\')/',
        'comment'  => '/(--[^\r\n]*|\/\*.*?\*\/)/ms',
        'keyword'  => '/(?i)\b(SELECT|FROM|WHERE|INSERT|INTO|UPDATE|DELETE|JOIN|LEFT|RIGHT|INNER|OUTER|ON|GROUP|BY|ORDER|HAVING|LIMIT|OFFSET|CREATE|TABLE|DROP|ALTER|INDEX|KEY|PRIMARY|FOREIGN|CONSTRAINT|DEFAULT|NOT|AND|OR|IN|VALUES|SET|AS|DISTINCT|UNION|ALL|CASE|WHEN|THEN|ELSE|END)\b/',
        'boolean'  => '/(?i)\b(NULL|TRUE|FALSE)\b/',
        'function' => '/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\()/',
        'number'   => '/' . $int . '/',
      ],
      'yaml' => [
        'string'    => '/' . $str . '/',
        'comment'   => '/(#[^\r\n]*)/m',
        'attribute' => '/^(\s*[a-zA-Z0-9_-]+:)/m',
        'number'    => '/' . $float . '/',
      ],
      'markdown' => [
        'code'     => '/(^(?:    |\t)[^\n]*(?:\n(?:    |\t)[^\n]*)*)/',
        'comment'  => '/(```[\s\S]*?```|~~~[\s\S]*?~~~)/',
        'math'     => '/(\$((?:[^`\n$]|`[^`\n]*`)+)\$)/',
        'keyword'  => '/^(#{1,6})(?=\s)/m',
        'string'   => '/(\*\*[^\n*]+\*\*|__[^\n_]+__)/',
        'attribute' => '/(?<!\*)(\*[^\n*]+\*)(?!\*)|(?<!_)(_[^\n_]+_)(?!_)/',
        'function' => '/(`[^`\n]+`)/',
        'variable' => '/(\[[^\]]+\]\([^\)]+\))/',
        'operator' => '/^(\s*[-*+](?=\s)|\s*\d+\.(?=\s))/m',
      ],
      'rmd' => [
        'code'     => '/(^(?:    |\t)[^\n]*(?:\n(?:    |\t)[^\n]*)*)/',
        'comment'  => '/(```\{r[^\}]*\}[\s\S]*?```)/',
        'math'     => '/(\$((?:[^`\n$]|`[^`\n]*`)+)\$)/',
        'keyword'  => '/^(#{1,6})(?=\s)/m',
        'string'   => '/(\*\*[^\n*]+\*\*|__[^\n_]+__)/',
        'attribute' => '/(?<!\*)(\*[^\n*]+\*)(?!\*)|(?<!_)(_[^\n_]+_)(?!_)/',
        'function' => '/(`[^`\n]+`)/',
        'variable' => '/(\[[^\]]+\]\([^\)]+\))/',
        'operator' => '/^(\s*[-*+](?=\s)|\s*\d+\.(?=\s))/m',
      ],
      'r' => [
        'string'   => '/' . $str . '/',
        'comment'  => '/(#[^\r\n]*)/m',
        'keyword'  => '/\b(if|else|repeat|while|function|for|in|next|break)\b/',
        'boolean'  => '/\b(TRUE|FALSE|NULL|Inf|NaN|NA)\b/',
        'function' => '/\b([a-zA-Z_.][a-zA-Z0-9_.]*)\s*(?=\()/',
        'number'   => '/' . $float . '/',
      ]
    ];

    return $rules[strtolower( $lang )] ?? [];
  }
}