Implement Elixir lexer for the Monaco editor (#14)

* Implement Elixir lexer for the Monaco editor

* Tokenize function calls
This commit is contained in:
Jonatan Kłosko 2021-01-26 13:14:58 +01:00 committed by GitHub
parent 8e4b4af60c
commit 479b0379d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 648 additions and 2 deletions

View file

@ -0,0 +1,606 @@
/**
* A Monarch lexer for the Elixir language.
*
* By default the Monaco editor uses Monarch for tokenizing the source code,
* which is then used for syntax highlighting as defined by the theme.
*
* References:
*
* * Monarch documentation - https://microsoft.github.io/monaco-editor/monarch.html
* * Monarch lexers shipped with Monaco by default - https://github.com/microsoft/monaco-languages
* * Elixir lexer - https://github.com/elixir-makeup/makeup_elixir/blob/master/lib/makeup/lexers/elixir_lexer.ex
* * TextMate lexer (elixir-tmbundle) - https://github.com/elixir-editors/elixir-tmbundle/blob/master/Syntaxes/Elixir.tmLanguage
* * TextMate lexer (vscode-elixir-ls) - https://github.com/elixir-lsp/vscode-elixir-ls/blob/master/syntaxes/elixir.json
*/
const ElixirMonarchLanguage = {
defaultToken: "source",
tokenPostfix: ".elixir",
brackets: [
{ open: "[", close: "]", token: "delimiter.square" },
{ open: "(", close: ")", token: "delimiter.parenthesis" },
{ open: "{", close: "}", token: "delimiter.curly" },
{ open: "<<", close: ">>", token: "delimiter.angle.special" },
],
// Below are lists/regexps to which we reference later.
declarationKeywords: [
"def",
"defp",
"defguard",
"defguardp",
"defmacro",
"defmacrop",
"defdelegate",
"defcallback",
"defmacrocallback",
"defmodule",
"defprotocol",
"defexception",
"defimpl",
"defstruct",
],
operatorKeywords: ["and", "in", "not", "or", "when"],
namespaceKeywords: ["alias", "import", "require", "use"],
otherKeywords: [
"after",
"case",
"catch",
"cond",
"do",
"else",
"end",
"fn",
"for",
"if",
"quote",
"raise",
"receive",
"rescue",
"super",
"throw",
"try",
"unless",
"unquote_splicing",
"unquote",
"with",
],
constants: ["true", "false", "nil"],
nameBuiltin: [
"__MODULE__",
"__DIR__",
"__ENV__",
"__CALLER__",
"__STACKTRACE__",
],
// Matches any of the operator names:
// <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|> == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~ :: .. = < > + - * / | . ^ & !
operator: /-[->]?|!={0,2}|\*|\/|\\\\|&{1,3}|\.\.?|\^(?:\^\^)?|\+\+?|<(?:-|<<|=|>|\|>|~>?)?|=~|={1,3}|>(?:=|>>)?|\|~>|\|>|\|{1,3}|~>>?|~~~|::/,
// See https://hexdocs.pm/elixir/syntax-reference.html#variables
variableName: /[a-z_][a-zA-Z0-9_]*[?!]?/,
// Seehttps://hexdocs.pm/elixir/syntax-reference.html#atoms
atomName: /[a-zA-Z_][a-zA-Z0-9_@]*[?!]?|@specialAtomName|@operator/,
specialAtomName: /\.\.\.|<<>>|%\{\}|%|\{\}/,
aliasPart: /[A-Z][a-zA-Z0-9_]*/,
moduleName: /@aliasPart(?:\.@aliasPart)*/,
// Sigil pairs are: """ """, ''' ''', " ", ' ', / /, | |, < >, { }, [ ], ( )
sigilSymmetricDelimiter: /"""|'''|"|'|\/|\|/,
sigilStartDelimiter: /@sigilSymmetricDelimiter|<|\{|\[|\(/,
sigilEndDelimiter: /@sigilSymmetricDelimiter|>|\}|\]|\)/,
decimal: /\d(?:_?\d)*/,
hex: /[0-9a-fA-F](_?[0-9a-fA-F])*/,
octal: /[0-7](_?[0-7])*/,
binary: /[01](_?[01])*/,
// See https://hexdocs.pm/elixir/master/String.html#module-escape-characters
escape: /\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}|\\./,
// The keys below correspond to tokenizer states.
// We start from the root state and match against its rules
// until we explicitly transition into another state.
// The `include` simply brings in all operations from the given state
// and is useful for improving readability.
tokenizer: {
root: [
{ include: "@whitespace" },
{ include: "@comments" },
// Keywords start as either an identifier or a string,
// but end with a : so it's important to match this first.
{ include: "@keywordsShorthand" },
{ include: "@numbers" },
{ include: "@identifiers" },
{ include: "@strings" },
{ include: "@atoms" },
{ include: "@sigils" },
{ include: "@attributes" },
{ include: "@symbols" },
],
// Whitespace
whitespace: [[/\s+/, "white"]],
// Comments
comments: [[/(#)(.*)/, ["comment.punctuation", "comment"]]],
// Keyword list shorthand
keywordsShorthand: [
[/(@atomName)(:)/, ["constant", "constant.punctuation"]],
// Use positive look-ahead to ensure the string is followed by :
// and should be considered a keyword.
[
/"(?=([^"]|#\{.*?\}|\\")*":)/,
{ token: "constant.delimiter", next: "@doubleQuotedStringKeyword" },
],
[
/'(?=([^']|#\{.*?\}|\\')*':)/,
{ token: "constant.delimiter", next: "@singleQuotedStringKeyword" },
],
],
doubleQuotedStringKeyword: [
[/":/, { token: "constant.delimiter", next: "@pop" }],
{ include: "@stringConstantContentInterpol" },
],
singleQuotedStringKeyword: [
[/':/, { token: "constant.delimiter", next: "@pop" }],
{ include: "@stringConstantContentInterpol" },
],
// Numbers
numbers: [
[/0b@binary/, "number.binary"],
[/0o@octal/, "number.octal"],
[/0x@hex/, "number.hex"],
[/@decimal\.@decimal([eE]-?@decimal)?/, "number.float"],
[/@decimal/, "number"],
],
// Identifiers
identifiers: [
// Tokenize identifier name in function-like definitions.
// Note: given `def a + b, do: nil`, `a` is not a function name,
// so we use negative look-ahead to ensure there's no operator.
[
/\b(defp?|defmacrop?|defguardp?|defdelegate)(\s+)(@variableName)(?!\s+@operator)/,
[
"keyword.declaration",
"white",
{
cases: {
unquote: "keyword",
"@default": "function",
},
},
],
],
// Tokenize function calls
[
// In-scope call - an identifier followed by ( or .(
/(@variableName)(?=\s*\.?\s*\()/,
['function.call']
],
[
// Referencing function in a module
/(@moduleName)(\s*)(\.)(\s*)(@variableName)/,
['type.identifier', 'white', 'operator', 'white', 'function.call']
],
[
// Referencing function in an Erlang module
/(:)(@atomName)(\s*)(\.)(\s*)(@variableName)/,
["constant.punctuation", "constant", 'white', 'operator', 'white', 'function.call']
],
[
// Piping into a function (tokenized separately as it may not have parentheses)
/(\|>)(\s*)(@variableName)/,
['operator', 'white', 'function.call']
],
[
// Function reference passed to another function
/(&)(\s*)(@variableName)/,
['operator', 'white', 'function.call']
],
// Language keywords, builtins, constants and variables
[
/@variableName/,
{
cases: {
"@declarationKeywords": "keyword.declaration",
"@operatorKeywords": "keyword.operator",
"@namespaceKeywords": "keyword",
"@otherKeywords": "keyword",
"@constants": "constant.language",
"@nameBuiltin": "variable.language",
"_.*": "comment.unused",
"@default": "identifier",
},
},
],
// Module names
[/@moduleName/, "type.identifier"],
],
// Strings
strings: [
[/"""/, { token: "string.delimiter", next: "@doubleQuotedHeredoc" }],
[/'''/, { token: "string.delimiter", next: "@singleQuotedHeredoc" }],
[/"/, { token: "string.delimiter", next: "@doubleQuotedString" }],
[/'/, { token: "string.delimiter", next: "@singleQuotedString" }],
],
doubleQuotedHeredoc: [
[/"""/, { token: "string.delimiter", next: "@pop" }],
{ include: "@stringContentInterpol" },
],
singleQuotedHeredoc: [
[/'''/, { token: "string.delimiter", next: "@pop" }],
{ include: "@stringContentInterpol" },
],
doubleQuotedString: [
[/"/, { token: "string.delimiter", next: "@pop" }],
{ include: "@stringContentInterpol" },
],
singleQuotedString: [
[/'/, { token: "string.delimiter", next: "@pop" }],
{ include: "@stringContentInterpol" },
],
// Atoms
atoms: [
[/(:)(@atomName)/, ["constant.punctuation", "constant"]],
[/:"/, { token: "constant.delimiter", next: "@doubleQuotedStringAtom" }],
[/:'/, { token: "constant.delimiter", next: "@singleQuotedStringAtom" }],
],
doubleQuotedStringAtom: [
[/"/, { token: "constant.delimiter", next: "@pop" }],
{ include: "@stringConstantContentInterpol" },
],
singleQuotedStringAtom: [
[/'/, { token: "constant.delimiter", next: "@pop" }],
{ include: "@stringConstantContentInterpol" },
],
// Sigils
// See https://elixir-lang.org/getting-started/sigils.html
// Sigils allow for typing values using their textual representation.
// All sigils start with ~ followed by a letter indicating sigil type
// and then a delimiter pair enclosing the textual representation.
// Optional modifiers are allowed after the closing delimiter.
// For instance a regular expressions can be written as:
// ~r/foo|bar/ ~r{foo|bar} ~r/foo|bar/g
//
// In general lowercase sigils allow for interpolation
// and escaped characters, whereas uppercase sigils don't
//
// During tokenization we want to distinguish some
// specific sigil types, namely string and regexp,
// so that they cen be themed separately.
//
// To reasonably handle all those combinations we leverage
// dot-separated states, so if we transition to @sigilStart.interpol.s.{.}
// then "sigilStart.interpol.s" state will match and also all
// the individual dot-separated parameters can be accessed.
sigils: [
[
/~[a-z]@sigilStartDelimiter/,
{ token: "@rematch", next: "@sigil.interpol" },
],
[
/~[A-Z]@sigilStartDelimiter/,
{ token: "@rematch", next: "@sigil.noInterpol" },
],
],
sigil: [
[
/~([a-zA-Z])\{/,
{ token: "@rematch", switchTo: "@sigilStart.$S2.$1.{.}" },
],
[
/~([a-zA-Z])\[/,
{ token: "@rematch", switchTo: "@sigilStart.$S2.$1.[.]" },
],
[
/~([a-zA-Z])\(/,
{ token: "@rematch", switchTo: "@sigilStart.$S2.$1.(.)" },
],
[
/~([a-zA-Z])\</,
{ token: "@rematch", switchTo: "@sigilStart.$S2.$1.<.>" },
],
[
/~([a-zA-Z])(@sigilSymmetricDelimiter)/,
{ token: "@rematch", switchTo: "@sigilStart.$S2.$1.$2.$2" },
],
],
// The definitions below expect states to be of the form:
//
// sigilStart.<interpol-or-noInterpol>.<sigil-letter>.<start-delimiter>.<end-delimiter>
// sigilContinue.<interpol-or-noInterpol>.<sigil-letter>.<start-delimiter>.<end-delimiter>
//
// The sigilStart state is used only to properly classify the token (as string/regex/sigil)
// and immediately switches to the sigilContinue sate, which handles the actual content
// and waits for the corresponding end delimiter.
"sigilStart.interpol.s": [
[
/~s@sigilStartDelimiter/,
{
token: "string.delimiter",
switchTo: "@sigilContinue.$S2.$S3.$S4.$S5",
},
],
],
"sigilContinue.interpol.s": [
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
"$1==$S5": { token: "string.delimiter", next: "@pop" },
"@default": "string",
},
},
],
{ include: "@stringContentInterpol" },
],
"sigilStart.noInterpol.S": [
[
/~S@sigilStartDelimiter/,
{
token: "string.delimiter",
switchTo: "@sigilContinue.$S2.$S3.$S4.$S5",
},
],
],
"sigilContinue.noInterpol.S": [
// Ignore escaped sigil end
[/(^|[^\\])\\@sigilEndDelimiter/, "string"],
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
"$1==$S5": { token: "string.delimiter", next: "@pop" },
"@default": "string",
},
},
],
{ include: "@stringContent" },
],
"sigilStart.interpol.r": [
[
/~r@sigilStartDelimiter/,
{
token: "regexp.delimiter",
switchTo: "@sigilContinue.$S2.$S3.$S4.$S5",
},
],
],
"sigilContinue.interpol.r": [
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
"$1==$S5": { token: "regexp.delimiter", next: "@pop" },
"@default": "regexp",
},
},
],
{ include: "@regexpContentInterpol" },
],
"sigilStart.noInterpol.R": [
[
/~R@sigilStartDelimiter/,
{
token: "regexp.delimiter",
switchTo: "@sigilContinue.$S2.$S3.$S4.$S5",
},
],
],
"sigilContinue.noInterpol.R": [
// Ignore escaped sigil end
[/(^|[^\\])\\@sigilEndDelimiter/, "regexp"],
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
"$1==$S5": { token: "regexp.delimiter", next: "@pop" },
"@default": "regexp",
},
},
],
{ include: "@regexpContent" },
],
// Fallback to the generic sigil by default
"sigilStart.interpol": [
[
/~([a-zA-Z])@sigilStartDelimiter/,
{
token: "sigil.delimiter",
switchTo: "@sigilContinue.$S2.$S3.$S4.$S5",
},
],
],
"sigilContinue.interpol": [
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
"$1==$S5": { token: "sigil.delimiter", next: "@pop" },
"@default": "sigil",
},
},
],
{ include: "@sigilContentInterpol" },
],
"sigilStart.noInterpol": [
[
/~([a-zA-Z])@sigilStartDelimiter/,
{
token: "sigil.delimiter",
switchTo: "@sigilContinue.$S2.$S3.$S4.$S5",
},
],
],
"sigilContinue.noInterpol": [
// Ignore escaped sigil end
[/(^|[^\\])\\@sigilEndDelimiter/, "sigil"],
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
"$1==$S5": { token: "sigil.delimiter", next: "@pop" },
"@default": "sigil",
},
},
],
{ include: "@sigilContent" },
],
// Attributes
attributes: [
// Module @doc* attributes - tokenized as comments
[
/\@(module|type)?doc (~[sS])?"""/,
{
token: "comment.block.documentation",
next: "@doubleQuotedHeredocDocstring",
},
],
[
/\@(module|type)?doc (~[sS])?"/,
{
token: "comment.block.documentation",
next: "@doubleQuotedStringDocstring",
},
],
[/\@(module|type)?doc false/, "comment.block.documentation"],
// Module attributes
[/\@@variableName/, "variable"],
],
doubleQuotedHeredocDocstring: [
[/"""/, { token: "comment.block.documentation", next: "@pop" }],
{ include: "@docstringContent" },
],
doubleQuotedStringDocstring: [
[/"/, { token: "comment.block.documentation", next: "@pop" }],
{ include: "@docstringContent" },
],
// Operators, punctuation, brackets
symbols: [
// Code point operator (either with regular character ?a or an escaped one ?\n)
[/\?(\\.|[^\\\s])/, "number.constant"],
// Anonymous function arguments
[/&\d+/, "operator"],
// Bitshift operators (must go before delimiters, so that << >> don't match first)
[/<<<|>>>/, "operator"],
// Delimiter pairs
[/[()\[\]\{\}]|<<|>>/, "@brackets"],
// Triple dot is a valid name (must go before operators, so that .. doesn't match instead)
[/\.\.\./, "identifier"],
// Punctuation => (must go before operators, so it's not tokenized as = then >)
[/=>/, "punctuation"],
// Operators
[/@operator/, "operator"],
// Punctuation
[/[:;,.%]/, "punctuation"],
],
// Generic helpers
stringContentInterpol: [
{ include: "@interpolation" },
{ include: "@escapeChar" },
{ include: "@stringContent" },
],
stringContent: [[/./, "string"]],
stringConstantContentInterpol: [
{ include: "@interpolation" },
{ include: "@escapeChar" },
{ include: "@stringConstantContent" },
],
stringConstantContent: [[/./, "constant"]],
regexpContentInterpol: [
{ include: "@interpolation" },
{ include: "@escapeChar" },
{ include: "@regexpContent" },
],
regexpContent: [
// # may be a regular regexp char, so we use a heuristic
// assuming a # surrounded by whitespace is actually a comment.
[/(\s)(#)(\s.*)$/, ["white", "comment.punctuation", "comment"]],
[/./, "regexp"],
],
sigilContentInterpol: [
{ include: "@interpolation" },
{ include: "@escapeChar" },
{ include: "@sigilContent" },
],
sigilContent: [[/./, "sigil"]],
docstringContent: [[/./, "comment.block.documentation"]],
escapeChar: [[/@escape/, "constant.character.escape"]],
interpolation: [
[
/#{/,
{ token: "delimiter.bracket.embed", next: "@interpolationContinue" },
],
],
interpolationContinue: [
[/}/, { token: "delimiter.bracket.embed", next: "@pop" }],
// Interpolation brackets may contain arbitrary code,
// so we simply match against all the root rules,
// until we reach interpolation end (the above matches).
{ include: "@root" },
],
},
};
export default ElixirMonarchLanguage;

View file

@ -53,7 +53,7 @@ function shouldInsertClosingEnd(lines, lineIndex) {
) {
const nextLineWithSameIndentation = lines
.slice(lineIndex + 1)
.filter(line => !isBlank(line))
.filter((line) => !isBlank(line))
.find((line) => indentation(line) === prevIndentation);
if (nextLineWithSameIndentation) {

View file

@ -60,6 +60,7 @@ const Editor = {
renderIndentGuides: false,
occurrencesHighlight: false,
renderLineHighlight: "none",
theme: "custom",
});
editor.getModel().updateOptions({

View file

@ -1,5 +1,6 @@
import * as monaco from "monaco-editor/esm/vs/editor/editor.api";
import ElixirLanguageConfiguration from "./elixir/language_configuration";
import ElixirMonarchLanguage from "./elixir/monarch_language";
import ElixirOnTypeFormattingEditProvider from "./elixir/on_type_formatting_edit_provider";
// Register the Elixir language and add relevant configuration
@ -15,6 +16,44 @@ monaco.languages.registerOnTypeFormattingEditProvider(
ElixirOnTypeFormattingEditProvider
);
// TODO: add Monarch tokenizer for syntax highlighting
monaco.languages.setMonarchTokensProvider("elixir", ElixirMonarchLanguage);
// Define custom theme
monaco.editor.defineTheme("custom", {
base: "vs",
inherit: false,
rules: [
{ token: "", foreground: "#444444" },
{ token: "variable", foreground: "#ca4956" },
{ token: "constant", foreground: "#3c91cf" },
{ token: "constant.character.escape", foreground: "#3c91cf" },
{ token: "comment", foreground: "#9e9e9e" },
{ token: "number", foreground: "#bf8b56" },
{ token: "regexp", foreground: "#ca4956" },
{ token: "type", foreground: "#ca4956" },
{ token: "string", foreground: "#50a14f" },
{ token: "keyword", foreground: "#9c00b0" },
{ token: "operator", foreground: "#cc5c52" },
{ token: "delimiter.bracket.embed", foreground: "#204a87" },
{ token: "sigil", foreground: "#bf8b56" },
{ token: "function", foreground: "#3c91cf" },
{ token: "function.call", foreground: "#444444" },
// Markdown specific
{ token: "emphasis", fontStyle: "italic" },
{ token: "strong", fontStyle: "bold" },
{ token: "keyword.md", foreground: "#ca4956" },
{ token: "keyword.table", foreground: "#ca4956" },
{ token: "string.link.md", foreground: "#3c91cf" },
{ token: "variable.md", foreground: "#204a87" },
],
colors: {
"editor.background": "#fafafa",
"editorLineNumber.foreground": "#cfd8dc",
"editorCursor.foreground": "#666666",
"editor.selectionBackground": "#eeeeee",
},
});
export default monaco;