From 479b0379d88504f28e6e28540f002f1a03f54dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20K=C5=82osko?= Date: Tue, 26 Jan 2021 13:14:58 +0100 Subject: [PATCH] Implement Elixir lexer for the Monaco editor (#14) * Implement Elixir lexer for the Monaco editor * Tokenize function calls --- assets/js/editor/elixir/monarch_language.js | 606 ++++++++++++++++++ .../on_type_formatting_edit_provider.js | 2 +- assets/js/editor/index.js | 1 + assets/js/editor/monaco.js | 41 +- 4 files changed, 648 insertions(+), 2 deletions(-) create mode 100644 assets/js/editor/elixir/monarch_language.js diff --git a/assets/js/editor/elixir/monarch_language.js b/assets/js/editor/elixir/monarch_language.js new file mode 100644 index 000000000..3802f96ea --- /dev/null +++ b/assets/js/editor/elixir/monarch_language.js @@ -0,0 +1,606 @@ +/** + * A Monarch lexer for the Elixir language. + * + * By default the Monaco editor uses Monarch for tokenizing the source code, + * which is then used for syntax highlighting as defined by the theme. + * + * References: + * + * * Monarch documentation - https://microsoft.github.io/monaco-editor/monarch.html + * * Monarch lexers shipped with Monaco by default - https://github.com/microsoft/monaco-languages + * * Elixir lexer - https://github.com/elixir-makeup/makeup_elixir/blob/master/lib/makeup/lexers/elixir_lexer.ex + * * TextMate lexer (elixir-tmbundle) - https://github.com/elixir-editors/elixir-tmbundle/blob/master/Syntaxes/Elixir.tmLanguage + * * TextMate lexer (vscode-elixir-ls) - https://github.com/elixir-lsp/vscode-elixir-ls/blob/master/syntaxes/elixir.json + */ +const ElixirMonarchLanguage = { + defaultToken: "source", + tokenPostfix: ".elixir", + + brackets: [ + { open: "[", close: "]", token: "delimiter.square" }, + { open: "(", close: ")", token: "delimiter.parenthesis" }, + { open: "{", close: "}", token: "delimiter.curly" }, + { open: "<<", close: ">>", token: "delimiter.angle.special" }, + ], + + // Below are lists/regexps to which we reference later. + + declarationKeywords: [ + "def", + "defp", + "defguard", + "defguardp", + "defmacro", + "defmacrop", + "defdelegate", + "defcallback", + "defmacrocallback", + "defmodule", + "defprotocol", + "defexception", + "defimpl", + "defstruct", + ], + operatorKeywords: ["and", "in", "not", "or", "when"], + namespaceKeywords: ["alias", "import", "require", "use"], + otherKeywords: [ + "after", + "case", + "catch", + "cond", + "do", + "else", + "end", + "fn", + "for", + "if", + "quote", + "raise", + "receive", + "rescue", + "super", + "throw", + "try", + "unless", + "unquote_splicing", + "unquote", + "with", + ], + constants: ["true", "false", "nil"], + nameBuiltin: [ + "__MODULE__", + "__DIR__", + "__ENV__", + "__CALLER__", + "__STACKTRACE__", + ], + + // Matches any of the operator names: + // <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|> == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~ :: .. = < > + - * / | . ^ & ! + operator: /-[->]?|!={0,2}|\*|\/|\\\\|&{1,3}|\.\.?|\^(?:\^\^)?|\+\+?|<(?:-|<<|=|>|\|>|~>?)?|=~|={1,3}|>(?:=|>>)?|\|~>|\|>|\|{1,3}|~>>?|~~~|::/, + + // See https://hexdocs.pm/elixir/syntax-reference.html#variables + variableName: /[a-z_][a-zA-Z0-9_]*[?!]?/, + + // Seehttps://hexdocs.pm/elixir/syntax-reference.html#atoms + atomName: /[a-zA-Z_][a-zA-Z0-9_@]*[?!]?|@specialAtomName|@operator/, + specialAtomName: /\.\.\.|<<>>|%\{\}|%|\{\}/, + + aliasPart: /[A-Z][a-zA-Z0-9_]*/, + moduleName: /@aliasPart(?:\.@aliasPart)*/, + + // Sigil pairs are: """ """, ''' ''', " ", ' ', / /, | |, < >, { }, [ ], ( ) + sigilSymmetricDelimiter: /"""|'''|"|'|\/|\|/, + sigilStartDelimiter: /@sigilSymmetricDelimiter|<|\{|\[|\(/, + sigilEndDelimiter: /@sigilSymmetricDelimiter|>|\}|\]|\)/, + + decimal: /\d(?:_?\d)*/, + hex: /[0-9a-fA-F](_?[0-9a-fA-F])*/, + octal: /[0-7](_?[0-7])*/, + binary: /[01](_?[01])*/, + + // See https://hexdocs.pm/elixir/master/String.html#module-escape-characters + escape: /\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}|\\./, + + // The keys below correspond to tokenizer states. + // We start from the root state and match against its rules + // until we explicitly transition into another state. + // The `include` simply brings in all operations from the given state + // and is useful for improving readability. + tokenizer: { + root: [ + { include: "@whitespace" }, + { include: "@comments" }, + // Keywords start as either an identifier or a string, + // but end with a : so it's important to match this first. + { include: "@keywordsShorthand" }, + { include: "@numbers" }, + { include: "@identifiers" }, + { include: "@strings" }, + { include: "@atoms" }, + { include: "@sigils" }, + { include: "@attributes" }, + { include: "@symbols" }, + ], + + // Whitespace + + whitespace: [[/\s+/, "white"]], + + // Comments + + comments: [[/(#)(.*)/, ["comment.punctuation", "comment"]]], + + // Keyword list shorthand + + keywordsShorthand: [ + [/(@atomName)(:)/, ["constant", "constant.punctuation"]], + // Use positive look-ahead to ensure the string is followed by : + // and should be considered a keyword. + [ + /"(?=([^"]|#\{.*?\}|\\")*":)/, + { token: "constant.delimiter", next: "@doubleQuotedStringKeyword" }, + ], + [ + /'(?=([^']|#\{.*?\}|\\')*':)/, + { token: "constant.delimiter", next: "@singleQuotedStringKeyword" }, + ], + ], + + doubleQuotedStringKeyword: [ + [/":/, { token: "constant.delimiter", next: "@pop" }], + { include: "@stringConstantContentInterpol" }, + ], + + singleQuotedStringKeyword: [ + [/':/, { token: "constant.delimiter", next: "@pop" }], + { include: "@stringConstantContentInterpol" }, + ], + + // Numbers + + numbers: [ + [/0b@binary/, "number.binary"], + [/0o@octal/, "number.octal"], + [/0x@hex/, "number.hex"], + [/@decimal\.@decimal([eE]-?@decimal)?/, "number.float"], + [/@decimal/, "number"], + ], + + // Identifiers + + identifiers: [ + // Tokenize identifier name in function-like definitions. + // Note: given `def a + b, do: nil`, `a` is not a function name, + // so we use negative look-ahead to ensure there's no operator. + [ + /\b(defp?|defmacrop?|defguardp?|defdelegate)(\s+)(@variableName)(?!\s+@operator)/, + [ + "keyword.declaration", + "white", + { + cases: { + unquote: "keyword", + "@default": "function", + }, + }, + ], + ], + // Tokenize function calls + [ + // In-scope call - an identifier followed by ( or .( + /(@variableName)(?=\s*\.?\s*\()/, + ['function.call'] + ], + [ + // Referencing function in a module + /(@moduleName)(\s*)(\.)(\s*)(@variableName)/, + ['type.identifier', 'white', 'operator', 'white', 'function.call'] + ], + [ + // Referencing function in an Erlang module + /(:)(@atomName)(\s*)(\.)(\s*)(@variableName)/, + ["constant.punctuation", "constant", 'white', 'operator', 'white', 'function.call'] + ], + [ + // Piping into a function (tokenized separately as it may not have parentheses) + /(\|>)(\s*)(@variableName)/, + ['operator', 'white', 'function.call'] + ], + [ + // Function reference passed to another function + /(&)(\s*)(@variableName)/, + ['operator', 'white', 'function.call'] + ], + // Language keywords, builtins, constants and variables + [ + /@variableName/, + { + cases: { + "@declarationKeywords": "keyword.declaration", + "@operatorKeywords": "keyword.operator", + "@namespaceKeywords": "keyword", + "@otherKeywords": "keyword", + "@constants": "constant.language", + "@nameBuiltin": "variable.language", + "_.*": "comment.unused", + "@default": "identifier", + }, + }, + ], + // Module names + [/@moduleName/, "type.identifier"], + ], + + // Strings + + strings: [ + [/"""/, { token: "string.delimiter", next: "@doubleQuotedHeredoc" }], + [/'''/, { token: "string.delimiter", next: "@singleQuotedHeredoc" }], + [/"/, { token: "string.delimiter", next: "@doubleQuotedString" }], + [/'/, { token: "string.delimiter", next: "@singleQuotedString" }], + ], + + doubleQuotedHeredoc: [ + [/"""/, { token: "string.delimiter", next: "@pop" }], + { include: "@stringContentInterpol" }, + ], + + singleQuotedHeredoc: [ + [/'''/, { token: "string.delimiter", next: "@pop" }], + { include: "@stringContentInterpol" }, + ], + + doubleQuotedString: [ + [/"/, { token: "string.delimiter", next: "@pop" }], + { include: "@stringContentInterpol" }, + ], + + singleQuotedString: [ + [/'/, { token: "string.delimiter", next: "@pop" }], + { include: "@stringContentInterpol" }, + ], + + // Atoms + + atoms: [ + [/(:)(@atomName)/, ["constant.punctuation", "constant"]], + [/:"/, { token: "constant.delimiter", next: "@doubleQuotedStringAtom" }], + [/:'/, { token: "constant.delimiter", next: "@singleQuotedStringAtom" }], + ], + + doubleQuotedStringAtom: [ + [/"/, { token: "constant.delimiter", next: "@pop" }], + { include: "@stringConstantContentInterpol" }, + ], + + singleQuotedStringAtom: [ + [/'/, { token: "constant.delimiter", next: "@pop" }], + { include: "@stringConstantContentInterpol" }, + ], + + // Sigils + + // See https://elixir-lang.org/getting-started/sigils.html + // Sigils allow for typing values using their textual representation. + // All sigils start with ~ followed by a letter indicating sigil type + // and then a delimiter pair enclosing the textual representation. + // Optional modifiers are allowed after the closing delimiter. + // For instance a regular expressions can be written as: + // ~r/foo|bar/ ~r{foo|bar} ~r/foo|bar/g + // + // In general lowercase sigils allow for interpolation + // and escaped characters, whereas uppercase sigils don't + // + // During tokenization we want to distinguish some + // specific sigil types, namely string and regexp, + // so that they cen be themed separately. + // + // To reasonably handle all those combinations we leverage + // dot-separated states, so if we transition to @sigilStart.interpol.s.{.} + // then "sigilStart.interpol.s" state will match and also all + // the individual dot-separated parameters can be accessed. + + sigils: [ + [ + /~[a-z]@sigilStartDelimiter/, + { token: "@rematch", next: "@sigil.interpol" }, + ], + [ + /~[A-Z]@sigilStartDelimiter/, + { token: "@rematch", next: "@sigil.noInterpol" }, + ], + ], + + sigil: [ + [ + /~([a-zA-Z])\{/, + { token: "@rematch", switchTo: "@sigilStart.$S2.$1.{.}" }, + ], + [ + /~([a-zA-Z])\[/, + { token: "@rematch", switchTo: "@sigilStart.$S2.$1.[.]" }, + ], + [ + /~([a-zA-Z])\(/, + { token: "@rematch", switchTo: "@sigilStart.$S2.$1.(.)" }, + ], + [ + /~([a-zA-Z])\" }, + ], + [ + /~([a-zA-Z])(@sigilSymmetricDelimiter)/, + { token: "@rematch", switchTo: "@sigilStart.$S2.$1.$2.$2" }, + ], + ], + + // The definitions below expect states to be of the form: + // + // sigilStart.... + // sigilContinue.... + // + // The sigilStart state is used only to properly classify the token (as string/regex/sigil) + // and immediately switches to the sigilContinue sate, which handles the actual content + // and waits for the corresponding end delimiter. + + "sigilStart.interpol.s": [ + [ + /~s@sigilStartDelimiter/, + { + token: "string.delimiter", + switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", + }, + ], + ], + + "sigilContinue.interpol.s": [ + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + "$1==$S5": { token: "string.delimiter", next: "@pop" }, + "@default": "string", + }, + }, + ], + { include: "@stringContentInterpol" }, + ], + + "sigilStart.noInterpol.S": [ + [ + /~S@sigilStartDelimiter/, + { + token: "string.delimiter", + switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", + }, + ], + ], + + "sigilContinue.noInterpol.S": [ + // Ignore escaped sigil end + [/(^|[^\\])\\@sigilEndDelimiter/, "string"], + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + "$1==$S5": { token: "string.delimiter", next: "@pop" }, + "@default": "string", + }, + }, + ], + { include: "@stringContent" }, + ], + + "sigilStart.interpol.r": [ + [ + /~r@sigilStartDelimiter/, + { + token: "regexp.delimiter", + switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", + }, + ], + ], + + "sigilContinue.interpol.r": [ + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + "$1==$S5": { token: "regexp.delimiter", next: "@pop" }, + "@default": "regexp", + }, + }, + ], + { include: "@regexpContentInterpol" }, + ], + + "sigilStart.noInterpol.R": [ + [ + /~R@sigilStartDelimiter/, + { + token: "regexp.delimiter", + switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", + }, + ], + ], + + "sigilContinue.noInterpol.R": [ + // Ignore escaped sigil end + [/(^|[^\\])\\@sigilEndDelimiter/, "regexp"], + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + "$1==$S5": { token: "regexp.delimiter", next: "@pop" }, + "@default": "regexp", + }, + }, + ], + { include: "@regexpContent" }, + ], + + // Fallback to the generic sigil by default + "sigilStart.interpol": [ + [ + /~([a-zA-Z])@sigilStartDelimiter/, + { + token: "sigil.delimiter", + switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", + }, + ], + ], + + "sigilContinue.interpol": [ + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + "$1==$S5": { token: "sigil.delimiter", next: "@pop" }, + "@default": "sigil", + }, + }, + ], + { include: "@sigilContentInterpol" }, + ], + + "sigilStart.noInterpol": [ + [ + /~([a-zA-Z])@sigilStartDelimiter/, + { + token: "sigil.delimiter", + switchTo: "@sigilContinue.$S2.$S3.$S4.$S5", + }, + ], + ], + + "sigilContinue.noInterpol": [ + // Ignore escaped sigil end + [/(^|[^\\])\\@sigilEndDelimiter/, "sigil"], + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + "$1==$S5": { token: "sigil.delimiter", next: "@pop" }, + "@default": "sigil", + }, + }, + ], + { include: "@sigilContent" }, + ], + + // Attributes + + attributes: [ + // Module @doc* attributes - tokenized as comments + [ + /\@(module|type)?doc (~[sS])?"""/, + { + token: "comment.block.documentation", + next: "@doubleQuotedHeredocDocstring", + }, + ], + [ + /\@(module|type)?doc (~[sS])?"/, + { + token: "comment.block.documentation", + next: "@doubleQuotedStringDocstring", + }, + ], + [/\@(module|type)?doc false/, "comment.block.documentation"], + // Module attributes + [/\@@variableName/, "variable"], + ], + + doubleQuotedHeredocDocstring: [ + [/"""/, { token: "comment.block.documentation", next: "@pop" }], + { include: "@docstringContent" }, + ], + + doubleQuotedStringDocstring: [ + [/"/, { token: "comment.block.documentation", next: "@pop" }], + { include: "@docstringContent" }, + ], + + // Operators, punctuation, brackets + + symbols: [ + // Code point operator (either with regular character ?a or an escaped one ?\n) + [/\?(\\.|[^\\\s])/, "number.constant"], + // Anonymous function arguments + [/&\d+/, "operator"], + // Bitshift operators (must go before delimiters, so that << >> don't match first) + [/<<<|>>>/, "operator"], + // Delimiter pairs + [/[()\[\]\{\}]|<<|>>/, "@brackets"], + // Triple dot is a valid name (must go before operators, so that .. doesn't match instead) + [/\.\.\./, "identifier"], + // Punctuation => (must go before operators, so it's not tokenized as = then >) + [/=>/, "punctuation"], + // Operators + [/@operator/, "operator"], + // Punctuation + [/[:;,.%]/, "punctuation"], + ], + + // Generic helpers + + stringContentInterpol: [ + { include: "@interpolation" }, + { include: "@escapeChar" }, + { include: "@stringContent" }, + ], + + stringContent: [[/./, "string"]], + + stringConstantContentInterpol: [ + { include: "@interpolation" }, + { include: "@escapeChar" }, + { include: "@stringConstantContent" }, + ], + + stringConstantContent: [[/./, "constant"]], + + regexpContentInterpol: [ + { include: "@interpolation" }, + { include: "@escapeChar" }, + { include: "@regexpContent" }, + ], + + regexpContent: [ + // # may be a regular regexp char, so we use a heuristic + // assuming a # surrounded by whitespace is actually a comment. + [/(\s)(#)(\s.*)$/, ["white", "comment.punctuation", "comment"]], + [/./, "regexp"], + ], + + sigilContentInterpol: [ + { include: "@interpolation" }, + { include: "@escapeChar" }, + { include: "@sigilContent" }, + ], + + sigilContent: [[/./, "sigil"]], + + docstringContent: [[/./, "comment.block.documentation"]], + + escapeChar: [[/@escape/, "constant.character.escape"]], + + interpolation: [ + [ + /#{/, + { token: "delimiter.bracket.embed", next: "@interpolationContinue" }, + ], + ], + + interpolationContinue: [ + [/}/, { token: "delimiter.bracket.embed", next: "@pop" }], + // Interpolation brackets may contain arbitrary code, + // so we simply match against all the root rules, + // until we reach interpolation end (the above matches). + { include: "@root" }, + ], + }, +}; + +export default ElixirMonarchLanguage; diff --git a/assets/js/editor/elixir/on_type_formatting_edit_provider.js b/assets/js/editor/elixir/on_type_formatting_edit_provider.js index b14c86c20..0e2b3b18d 100644 --- a/assets/js/editor/elixir/on_type_formatting_edit_provider.js +++ b/assets/js/editor/elixir/on_type_formatting_edit_provider.js @@ -53,7 +53,7 @@ function shouldInsertClosingEnd(lines, lineIndex) { ) { const nextLineWithSameIndentation = lines .slice(lineIndex + 1) - .filter(line => !isBlank(line)) + .filter((line) => !isBlank(line)) .find((line) => indentation(line) === prevIndentation); if (nextLineWithSameIndentation) { diff --git a/assets/js/editor/index.js b/assets/js/editor/index.js index e914a077b..ece6b9905 100644 --- a/assets/js/editor/index.js +++ b/assets/js/editor/index.js @@ -60,6 +60,7 @@ const Editor = { renderIndentGuides: false, occurrencesHighlight: false, renderLineHighlight: "none", + theme: "custom", }); editor.getModel().updateOptions({ diff --git a/assets/js/editor/monaco.js b/assets/js/editor/monaco.js index 85e528996..4678cdb7c 100644 --- a/assets/js/editor/monaco.js +++ b/assets/js/editor/monaco.js @@ -1,5 +1,6 @@ import * as monaco from "monaco-editor/esm/vs/editor/editor.api"; import ElixirLanguageConfiguration from "./elixir/language_configuration"; +import ElixirMonarchLanguage from "./elixir/monarch_language"; import ElixirOnTypeFormattingEditProvider from "./elixir/on_type_formatting_edit_provider"; // Register the Elixir language and add relevant configuration @@ -15,6 +16,44 @@ monaco.languages.registerOnTypeFormattingEditProvider( ElixirOnTypeFormattingEditProvider ); -// TODO: add Monarch tokenizer for syntax highlighting +monaco.languages.setMonarchTokensProvider("elixir", ElixirMonarchLanguage); + +// Define custom theme + +monaco.editor.defineTheme("custom", { + base: "vs", + inherit: false, + rules: [ + { token: "", foreground: "#444444" }, + { token: "variable", foreground: "#ca4956" }, + { token: "constant", foreground: "#3c91cf" }, + { token: "constant.character.escape", foreground: "#3c91cf" }, + { token: "comment", foreground: "#9e9e9e" }, + { token: "number", foreground: "#bf8b56" }, + { token: "regexp", foreground: "#ca4956" }, + { token: "type", foreground: "#ca4956" }, + { token: "string", foreground: "#50a14f" }, + { token: "keyword", foreground: "#9c00b0" }, + { token: "operator", foreground: "#cc5c52" }, + { token: "delimiter.bracket.embed", foreground: "#204a87" }, + { token: "sigil", foreground: "#bf8b56" }, + { token: "function", foreground: "#3c91cf" }, + { token: "function.call", foreground: "#444444" }, + + // Markdown specific + { token: "emphasis", fontStyle: "italic" }, + { token: "strong", fontStyle: "bold" }, + { token: "keyword.md", foreground: "#ca4956" }, + { token: "keyword.table", foreground: "#ca4956" }, + { token: "string.link.md", foreground: "#3c91cf" }, + { token: "variable.md", foreground: "#204a87" }, + ], + colors: { + "editor.background": "#fafafa", + "editorLineNumber.foreground": "#cfd8dc", + "editorCursor.foreground": "#666666", + "editor.selectionBackground": "#eeeeee", + }, +}); export default monaco;