/** * A Monarch lexer for the Elixir language. * * By default the Monaco editor uses Monarch for tokenizing the source code, * which is then used for syntax highlighting as defined by the theme. * * References: * * * Monarch documentation - https://microsoft.github.io/monaco-editor/monarch.html * * Monarch lexers shipped with Monaco by default - https://github.com/microsoft/monaco-languages * * Elixir lexer - https://github.com/elixir-makeup/makeup_elixir/blob/master/lib/makeup/lexers/elixir_lexer.ex * * TextMate lexer (elixir-tmbundle) - https://github.com/elixir-editors/elixir-tmbundle/blob/master/Syntaxes/Elixir.tmLanguage * * TextMate lexer (vscode-elixir-ls) - https://github.com/elixir-lsp/vscode-elixir-ls/blob/master/syntaxes/elixir.json */ const ElixirMonarchLanguage = { defaultToken: "source", tokenPostfix: ".elixir", brackets: [ { open: "[", close: "]", token: "delimiter.square" }, { open: "(", close: ")", token: "delimiter.parenthesis" }, { open: "{", close: "}", token: "delimiter.curly" }, { open: "<<", close: ">>", token: "delimiter.angle.special" }, ], // Below are lists/regexps to which we reference later. declarationKeywords: [ "def", "defp", "defn", "defnp", "defguard", "defguardp", "defmacro", "defmacrop", "defdelegate", "defcallback", "defmacrocallback", "defmodule", "defprotocol", "defexception", "defimpl", "defstruct", ], operatorKeywords: ["and", "in", "not", "or", "when"], namespaceKeywords: ["alias", "import", "require", "use"], otherKeywords: [ "after", "case", "catch", "cond", "do", "else", "end", "fn", "for", "if", "quote", "raise", "receive", "rescue", "super", "throw", "try", "unless", "unquote_splicing", "unquote", "with", ], constants: ["true", "false", "nil"], nameBuiltin: [ "__MODULE__", "__DIR__", "__ENV__", "__CALLER__", "__STACKTRACE__", ], // Matches any of the operator names: // <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|> == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~ :: .. = < > + - * / | . ^ & ! operator: /-[->]?|!={0,2}|\*|\/|\\\\|&{1,3}|\.\.?|\^(?:\^\^)?|\+\+?|<(?:-|<<|=|>|\|>|~>?)?|=~|={1,3}|>(?:=|>>)?|\|~>|\|>|\|{1,3}|~>>?|~~~|::/, // See https://hexdocs.pm/elixir/syntax-reference.html#variables variableName: /[a-z_][a-zA-Z0-9_]*[?!]?/, // Seehttps://hexdocs.pm/elixir/syntax-reference.html#atoms atomName: /[a-zA-Z_][a-zA-Z0-9_@]*[?!]?|@specialAtomName|@operator/, specialAtomName: /\.\.\.|<<>>|%\{\}|%|\{\}/, aliasPart: /[A-Z][a-zA-Z0-9_]*/, moduleName: /@aliasPart(?:\.@aliasPart)*/, // Sigil pairs are: """ """, ''' ''', " ", ' ', / /, | |, < >, { }, [ ], ( ) sigilSymmetricDelimiter: /"""|'''|"|'|\/|\|/, sigilStartDelimiter: /@sigilSymmetricDelimiter|<|\{|\[|\(/, sigilEndDelimiter: /@sigilSymmetricDelimiter|>|\}|\]|\)/, decimal: /\d(?:_?\d)*/, hex: /[0-9a-fA-F](_?[0-9a-fA-F])*/, octal: /[0-7](_?[0-7])*/, binary: /[01](_?[01])*/, // See https://hexdocs.pm/elixir/master/String.html#module-escape-characters escape: /\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}|\\./, // The keys below correspond to tokenizer states. // We start from the root state and match against its rules // until we explicitly transition into another state. // The `include` simply brings in all operations from the given state // and is useful for improving readability. tokenizer: { root: [ { include: "@whitespace" }, { include: "@comments" }, // Keywords start as either an identifier or a string, // but end with a : so it's important to match this first. { include: "@keywordsShorthand" }, { include: "@numbers" }, { include: "@identifiers" }, { include: "@strings" }, { include: "@atoms" }, { include: "@sigils" }, { include: "@attributes" }, { include: "@symbols" }, ], // Whitespace whitespace: [[/\s+/, "white"]], // Comments comments: [[/(#)(.*)/, ["comment.punctuation", "comment"]]], // Keyword list shorthand keywordsShorthand: [ [/(@atomName)(:)/, ["constant", "constant.punctuation"]], // Use positive look-ahead to ensure the string is followed by : // and should be considered a keyword. [ /"(?=([^"]|#\{.*?\}|\\")*":)/, { token: "constant.delimiter", next: "@doubleQuotedStringKeyword" }, ], [ /'(?=([^']|#\{.*?\}|\\')*':)/, { token: "constant.delimiter", next: "@singleQuotedStringKeyword" }, ], ], doubleQuotedStringKeyword: [ [/":/, { token: "constant.delimiter", next: "@pop" }], { include: "@stringConstantContentInterpol" }, ], singleQuotedStringKeyword: [ [/':/, { token: "constant.delimiter", next: "@pop" }], { include: "@stringConstantContentInterpol" }, ], // Numbers numbers: [ [/0b@binary/, "number.binary"], [/0o@octal/, "number.octal"], [/0x@hex/, "number.hex"], [/@decimal\.@decimal([eE]-?@decimal)?/, "number.float"], [/@decimal/, "number"], ], // Identifiers identifiers: [ // Tokenize identifier name in function-like definitions. // Note: given `def a + b, do: nil`, `a` is not a function name, // so we use negative look-ahead to ensure there's no operator. [ /\b(defp?|defnp?|defmacrop?|defguardp?|defdelegate)(\s+)(@variableName)(?!\s+@operator)/, [ "keyword.declaration", "white", { cases: { unquote: "keyword", "@default": "function", }, }, ], ], // Tokenize function calls [ // In-scope call - an identifier followed by ( or .( /(@variableName)(?=\s*\.?\s*\()/, { cases: { // Tokenize as keyword in cases like `if(..., do: ..., else: ...)` "@declarationKeywords": "keyword.declaration", "@namespaceKeywords": "keyword", "@otherKeywords": "keyword", "@default": "function.call", }, }, ], [ // Referencing function in a module /(@moduleName)(\s*)(\.)(\s*)(@variableName)/, ["type.identifier", "white", "operator", "white", "function.call"], ], [ // Referencing function in an Erlang module /(:)(@atomName)(\s*)(\.)(\s*)(@variableName)/, [ "constant.punctuation", "constant", "white", "operator", "white", "function.call", ], ], [ // Piping into a function (tokenized separately as it may not have parentheses) /(\|>)(\s*)(@variableName)/, ["operator", "white", "function.call"], ], [ // Function reference passed to another function /(&)(\s*)(@variableName)/, ["operator", "white", "function.call"], ], // Language keywords, builtins, constants and variables [ /@variableName/, { cases: { "@declarationKeywords": "keyword.declaration", "@operatorKeywords": "keyword.operator", "@namespaceKeywords": "keyword", "@otherKeywords": "keyword", "@constants": "constant.language", "@nameBuiltin": "variable.language", "_.*": "comment.unused", "@default": "identifier", }, }, ], // Module names [/@moduleName/, "type.identifier"], ], // Strings strings: [ [/"""/, { token: "string.delimiter", next: "@doubleQuotedHeredoc" }], [/'''/, { token: "string.delimiter", next: "@singleQuotedHeredoc" }], [/"/, { token: "string.delimiter", next: "@doubleQuotedString" }], [/'/, { token: "string.delimiter", next: "@singleQuotedString" }], ], doubleQuotedHeredoc: [ [/"""/, { token: "string.delimiter", next: "@pop" }], { include: "@stringContentInterpol" }, ], singleQuotedHeredoc: [ [/'''/, { token: "string.delimiter", next: "@pop" }], { include: "@stringContentInterpol" }, ], doubleQuotedString: [ [/"/, { token: "string.delimiter", next: "@pop" }], { include: "@stringContentInterpol" }, ], singleQuotedString: [ [/'/, { token: "string.delimiter", next: "@pop" }], { include: "@stringContentInterpol" }, ], // Atoms atoms: [ [/(:)(@atomName)/, ["constant.punctuation", "constant"]], [/:"/, { token: "constant.delimiter", next: "@doubleQuotedStringAtom" }], [/:'/, { token: "constant.delimiter", next: "@singleQuotedStringAtom" }], ], doubleQuotedStringAtom: [ [/"/, { token: "constant.delimiter", next: "@pop" }], { include: "@stringConstantContentInterpol" }, ], singleQuotedStringAtom: [ [/'/, { token: "constant.delimiter", next: "@pop" }], { include: "@stringConstantContentInterpol" }, ], // Sigils // See https://elixir-lang.org/getting-started/sigils.html // Sigils allow for typing values using their textual representation. // All sigils start with ~ followed by a letter indicating sigil type // and then a delimiter pair enclosing the textual representation. // Optional modifiers are allowed after the closing delimiter. // For instance a regular expressions can be written as: // ~r/foo|bar/ ~r{foo|bar} ~r/foo|bar/g // // In general lowercase sigils allow for interpolation // and escaped characters, whereas uppercase sigils don't // // During tokenization we want to distinguish some // specific sigil types, namely string and regexp, // so that they cen be themed separately. // // To reasonably handle all those combinations we leverage // dot-separated states, so if we transition to @sigilStart.interpol.s.{.} // then "sigilStart.interpol.s" state will match and also all // the individual dot-separated parameters can be accessed. sigils: [ [ /~[a-z]@sigilStartDelimiter/, { token: "@rematch", next: "@sigil.interpol" }, ], [ /~[A-Z]@sigilStartDelimiter/, { token: "@rematch", next: "@sigil.noInterpol" }, ], ], sigil: [ [ /~([a-zA-Z])\{/, { token: "@rematch", switchTo: "@sigilStart.$S2.$1.{.}" }, ], [ /~([a-zA-Z])\[/, { token: "@rematch", switchTo: "@sigilStart.$S2.$1.[.]" }, ], [ /~([a-zA-Z])\(/, { token: "@rematch", switchTo: "@sigilStart.$S2.$1.(.)" }, ], [ /~([a-zA-Z])\" }, ], [ /~([a-zA-Z])(@sigilSymmetricDelimiter)/, { token: "@rematch", switchTo: "@sigilStart.$S2.$1.$2.$2" }, ], ], // The definitions below expect states to be of the form: // // sigilStart.... // sigilContinue.... // // The sigilStart state is used only to properly classify the token (as string/regex/sigil) // and immediately switches to the sigilContinue sate, which handles the actual content // and waits for the corresponding end delimiter. "sigilStart.interpol.s": [ [ /~s@sigilStartDelimiter/, { token: "string.delimiter", switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", }, ], ], "sigilContinue.interpol.s": [ [ /(@sigilEndDelimiter)[a-zA-Z]*/, { cases: { "$1==$S5": { token: "string.delimiter", next: "@pop" }, "@default": "string", }, }, ], { include: "@stringContentInterpol" }, ], "sigilStart.noInterpol.S": [ [ /~S@sigilStartDelimiter/, { token: "string.delimiter", switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", }, ], ], "sigilContinue.noInterpol.S": [ // Ignore escaped sigil end [/(^|[^\\])\\@sigilEndDelimiter/, "string"], [ /(@sigilEndDelimiter)[a-zA-Z]*/, { cases: { "$1==$S5": { token: "string.delimiter", next: "@pop" }, "@default": "string", }, }, ], { include: "@stringContent" }, ], "sigilStart.interpol.r": [ [ /~r@sigilStartDelimiter/, { token: "regexp.delimiter", switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", }, ], ], "sigilContinue.interpol.r": [ [ /(@sigilEndDelimiter)[a-zA-Z]*/, { cases: { "$1==$S5": { token: "regexp.delimiter", next: "@pop" }, "@default": "regexp", }, }, ], { include: "@regexpContentInterpol" }, ], "sigilStart.noInterpol.R": [ [ /~R@sigilStartDelimiter/, { token: "regexp.delimiter", switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", }, ], ], "sigilContinue.noInterpol.R": [ // Ignore escaped sigil end [/(^|[^\\])\\@sigilEndDelimiter/, "regexp"], [ /(@sigilEndDelimiter)[a-zA-Z]*/, { cases: { "$1==$S5": { token: "regexp.delimiter", next: "@pop" }, "@default": "regexp", }, }, ], { include: "@regexpContent" }, ], // Fallback to the generic sigil by default "sigilStart.interpol": [ [ /~([a-zA-Z])@sigilStartDelimiter/, { token: "sigil.delimiter", switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", }, ], ], "sigilContinue.interpol": [ [ /(@sigilEndDelimiter)[a-zA-Z]*/, { cases: { "$1==$S5": { token: "sigil.delimiter", next: "@pop" }, "@default": "sigil", }, }, ], { include: "@sigilContentInterpol" }, ], "sigilStart.noInterpol": [ [ /~([a-zA-Z])@sigilStartDelimiter/, { token: "sigil.delimiter", switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", }, ], ], "sigilContinue.noInterpol": [ // Ignore escaped sigil end [/(^|[^\\])\\@sigilEndDelimiter/, "sigil"], [ /(@sigilEndDelimiter)[a-zA-Z]*/, { cases: { "$1==$S5": { token: "sigil.delimiter", next: "@pop" }, "@default": "sigil", }, }, ], { include: "@sigilContent" }, ], // Attributes attributes: [ // Module @doc* attributes - tokenized as comments [ /\@(module|type)?doc (~[sS])?"""/, { token: "comment.block.documentation", next: "@doubleQuotedHeredocDocstring", }, ], [ /\@(module|type)?doc (~[sS])?"/, { token: "comment.block.documentation", next: "@doubleQuotedStringDocstring", }, ], [/\@(module|type)?doc false/, "comment.block.documentation"], // Module attributes [/\@(@variableName)/, "variable"], ], doubleQuotedHeredocDocstring: [ [/"""/, { token: "comment.block.documentation", next: "@pop" }], { include: "@docstringContent" }, ], doubleQuotedStringDocstring: [ [/"/, { token: "comment.block.documentation", next: "@pop" }], { include: "@docstringContent" }, ], // Operators, punctuation, brackets symbols: [ // Code point operator (either with regular character ?a or an escaped one ?\n) [/\?(\\.|[^\\\s])/, "number.constant"], // Anonymous function arguments [/&\d+/, "operator"], // Bitshift operators (must go before delimiters, so that << >> don't match first) [/<<<|>>>/, "operator"], // Delimiter pairs [/[()\[\]\{\}]|<<|>>/, "@brackets"], // Triple dot is a valid name (must go before operators, so that .. doesn't match instead) [/\.\.\./, "identifier"], // Punctuation => (must go before operators, so it's not tokenized as = then >) [/=>/, "punctuation"], // Operators [/@operator/, "operator"], // Punctuation [/[:;,.%]/, "punctuation"], ], // Generic helpers stringContentInterpol: [ { include: "@interpolation" }, { include: "@escapeChar" }, { include: "@stringContent" }, ], stringContent: [[/./, "string"]], stringConstantContentInterpol: [ { include: "@interpolation" }, { include: "@escapeChar" }, { include: "@stringConstantContent" }, ], stringConstantContent: [[/./, "constant"]], regexpContentInterpol: [ { include: "@interpolation" }, { include: "@escapeChar" }, { include: "@regexpContent" }, ], regexpContent: [ // # may be a regular regexp char, so we use a heuristic // assuming a # surrounded by whitespace is actually a comment. [/(\s)(#)(\s.*)$/, ["white", "comment.punctuation", "comment"]], [/./, "regexp"], ], sigilContentInterpol: [ { include: "@interpolation" }, { include: "@escapeChar" }, { include: "@sigilContent" }, ], sigilContent: [[/./, "sigil"]], docstringContent: [[/./, "comment.block.documentation"]], escapeChar: [[/@escape/, "constant.character.escape"]], interpolation: [ [ /#{/, { token: "delimiter.bracket.embed", next: "@interpolationContinue" }, ], ], interpolationContinue: [ [/}/, { token: "delimiter.bracket.embed", next: "@pop" }], // Interpolation brackets may contain arbitrary code, // so we simply match against all the root rules, // until we reach interpolation end (the above matches). { include: "@root" }, ], }, }; export default ElixirMonarchLanguage;