diff --git a/.golangci.yaml b/.golangci.yaml index 0c1ba432..adcf3fd3 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -67,6 +67,8 @@ linters-settings: disabled: true - name: early-return disabled: true + - name: use-any + disabled: true gocritic: disabled-checks: - ifElseChain diff --git a/plugin/gomark/ast/ast.go b/plugin/gomark/ast/ast.go index e2ba029c..bd412963 100644 --- a/plugin/gomark/ast/ast.go +++ b/plugin/gomark/ast/ast.go @@ -1,19 +1 @@ package ast - -type Node struct { - Type string - Text string - Children []*Node -} - -type Document struct { - Nodes []*Node -} - -func NewDocument() *Document { - return &Document{} -} - -func (d *Document) AddNode(node *Node) { - d.Nodes = append(d.Nodes, node) -} diff --git a/plugin/gomark/ast/block.go b/plugin/gomark/ast/block.go new file mode 100644 index 00000000..fcbe89c5 --- /dev/null +++ b/plugin/gomark/ast/block.go @@ -0,0 +1,42 @@ +package ast + +type BaseBlock struct { +} + +type Paragraph struct { + BaseBlock + + Children []Node +} + +var NodeTypeParagraph = NewNodeType("Paragraph") + +func NewParagraph(children []Node) *Paragraph { + return &Paragraph{ + Children: children, + } +} + +func (*Paragraph) Type() NodeType { + return NodeTypeParagraph +} + +type CodeBlock struct { + BaseBlock + + Language string + Content string +} + +var NodeTypeCodeBlock = NewNodeType("CodeBlock") + +func NewCodeBlock(language, content string) *CodeBlock { + return &CodeBlock{ + Language: language, + Content: content, + } +} + +func (*CodeBlock) Type() NodeType { + return NodeTypeCodeBlock +} diff --git a/plugin/gomark/ast/inline.go b/plugin/gomark/ast/inline.go new file mode 100644 index 00000000..6bc230d7 --- /dev/null +++ b/plugin/gomark/ast/inline.go @@ -0,0 +1,42 @@ +package ast + +type BaseInline struct{} + +type Text struct { + BaseInline + + Content string +} + +var NodeTypeText = NewNodeType("Text") + +func NewText(content string) *Text { + return &Text{ + Content: content, + } +} + +func (*Text) Type() NodeType { + return NodeTypeText +} + +type Bold struct { + BaseInline + + // Symbol is "*" or "_" + Symbol string + Content string +} + +var NodeTypeBold = NewNodeType("Bold") + +func NewBold(symbol, content string) *Bold { + return &Bold{ + Symbol: symbol, + Content: content, + } +} + +func (*Bold) Type() NodeType { + return NodeTypeBold +} diff --git a/plugin/gomark/ast/node.go b/plugin/gomark/ast/node.go index 0ef0259d..ebd0e856 100644 --- a/plugin/gomark/ast/node.go +++ b/plugin/gomark/ast/node.go @@ -1,12 +1,20 @@ package ast -func NewNode(tp, text string) *Node { - return &Node{ - Type: tp, - Text: text, - } +type Node interface { + Type() NodeType } -func (n *Node) AddChild(child *Node) { - n.Children = append(n.Children, child) +type NodeType int + +func (t NodeType) String() string { + return nodeTypeNames[t] +} + +var nodeTypeIndex NodeType +var nodeTypeNames = []string{""} + +func NewNodeType(name string) NodeType { + nodeTypeNames = append(nodeTypeNames, name) + nodeTypeIndex++ + return nodeTypeIndex } diff --git a/plugin/gomark/parser/bold.go b/plugin/gomark/parser/bold.go index 6b38a0b0..237cb885 100644 --- a/plugin/gomark/parser/bold.go +++ b/plugin/gomark/parser/bold.go @@ -1,49 +1,60 @@ package parser import ( + "github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/parser/tokenizer" ) -type BoldParser struct { - ContentTokens []*tokenizer.Token +type BoldParser struct{} + +var defaultBoldParser = &BoldParser{} + +func NewBoldParser() InlineParser { + return defaultBoldParser } -func NewBoldParser() *BoldParser { - return &BoldParser{} -} - -func (*BoldParser) Match(tokens []*tokenizer.Token) *BoldParser { +func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) { if len(tokens) < 5 { - return nil + return 0, false } prefixTokens := tokens[:2] if prefixTokens[0].Type != prefixTokens[1].Type { - return nil + return 0, false } prefixTokenType := prefixTokens[0].Type if prefixTokenType != tokenizer.Star && prefixTokenType != tokenizer.Underline { - return nil + return 0, false } - contentTokens := []*tokenizer.Token{} cursor, matched := 2, false for ; cursor < len(tokens)-1; cursor++ { token, nextToken := tokens[cursor], tokens[cursor+1] if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline { - return nil + return 0, false } if token.Type == prefixTokenType && nextToken.Type == prefixTokenType { matched = true break } - contentTokens = append(contentTokens, token) } if !matched { + return 0, false + } + + return cursor + 2, true +} + +func (p *BoldParser) Parse(tokens []*tokenizer.Token) ast.Node { + size, ok := p.Match(tokens) + if size == 0 || !ok { return nil } - return &BoldParser{ - ContentTokens: contentTokens, + prefixTokenType := tokens[0].Type + contentTokens := tokens[2 : size-2] + return &ast.Bold{ + Symbol: prefixTokenType, + Content: tokenizer.Stringify(contentTokens), } } diff --git a/plugin/gomark/parser/bold_test.go b/plugin/gomark/parser/bold_test.go index 511bb70e..de758d5f 100644 --- a/plugin/gomark/parser/bold_test.go +++ b/plugin/gomark/parser/bold_test.go @@ -5,13 +5,14 @@ import ( "github.com/stretchr/testify/require" + "github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/parser/tokenizer" ) func TestBoldParser(t *testing.T) { tests := []struct { text string - bold *BoldParser + bold ast.Node }{ { text: "*Hello world!", @@ -19,32 +20,16 @@ func TestBoldParser(t *testing.T) { }, { text: "**Hello**", - bold: &BoldParser{ - ContentTokens: []*tokenizer.Token{ - { - Type: tokenizer.Text, - Value: "Hello", - }, - }, + bold: &ast.Bold{ + Symbol: "*", + Content: "Hello", }, }, { text: "** Hello **", - bold: &BoldParser{ - ContentTokens: []*tokenizer.Token{ - { - Type: tokenizer.Space, - Value: " ", - }, - { - Type: tokenizer.Text, - Value: "Hello", - }, - { - Type: tokenizer.Space, - Value: " ", - }, - }, + bold: &ast.Bold{ + Symbol: "*", + Content: " Hello ", }, }, { @@ -55,35 +40,11 @@ func TestBoldParser(t *testing.T) { text: "* * Hello **", bold: nil, }, - { - text: `** Hello -**`, - bold: nil, - }, - { - text: `**Hello \n**`, - bold: &BoldParser{ - ContentTokens: []*tokenizer.Token{ - { - Type: tokenizer.Text, - Value: "Hello", - }, - { - Type: tokenizer.Space, - Value: " ", - }, - { - Type: tokenizer.Text, - Value: `\n`, - }, - }, - }, - }, } for _, test := range tests { tokens := tokenizer.Tokenize(test.text) - bold := NewBoldParser() - require.Equal(t, test.bold, bold.Match(tokens)) + parser := NewBoldParser() + require.Equal(t, test.bold, parser.Parse(tokens)) } } diff --git a/plugin/gomark/parser/code_block.go b/plugin/gomark/parser/code_block.go index 4bf4fcac..7cc8510f 100644 --- a/plugin/gomark/parser/code_block.go +++ b/plugin/gomark/parser/code_block.go @@ -1,52 +1,79 @@ package parser -import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +import ( + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) type CodeBlockParser struct { Language string Content string } +var defaultCodeBlockParser = &CodeBlockParser{} + func NewCodeBlockParser() *CodeBlockParser { - return &CodeBlockParser{} + return defaultCodeBlockParser } -func (*CodeBlockParser) Match(tokens []*tokenizer.Token) *CodeBlockParser { +func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) { if len(tokens) < 9 { - return nil + return 0, false } if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick { - return nil + return 0, false } if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline { - return nil + return 0, false } - cursor, language := 4, "" + cursor := 4 if tokens[3].Type != tokenizer.Newline { - language = tokens[3].Value cursor = 5 } - content, matched := "", false + matched := false for ; cursor < len(tokens)-3; cursor++ { if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.Backtick && tokens[cursor+2].Type == tokenizer.Backtick && tokens[cursor+3].Type == tokenizer.Backtick { if cursor+3 == len(tokens)-1 { + cursor += 4 matched = true break } else if tokens[cursor+4].Type == tokenizer.Newline { + cursor += 5 matched = true break } } - content += tokens[cursor].Value } if !matched { + return 0, false + } + + return cursor, true +} + +func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) ast.Node { + size, ok := p.Match(tokens) + if size == 0 || !ok { return nil } - return &CodeBlockParser{ - Language: language, - Content: content, + languageToken := tokens[3] + contentStart, contentEnd := 5, size-4 + if languageToken.Type == tokenizer.Newline { + languageToken = nil + contentStart = 4 } + if tokens[size-1].Type == tokenizer.Newline { + contentEnd = size - 5 + } + + codeBlock := &ast.CodeBlock{ + Content: tokenizer.Stringify(tokens[contentStart:contentEnd]), + } + if languageToken != nil { + codeBlock.Language = languageToken.String() + } + return codeBlock } diff --git a/plugin/gomark/parser/code_block_test.go b/plugin/gomark/parser/code_block_test.go index cbfbf6bd..c4bcaaae 100644 --- a/plugin/gomark/parser/code_block_test.go +++ b/plugin/gomark/parser/code_block_test.go @@ -5,13 +5,14 @@ import ( "github.com/stretchr/testify/require" + "github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/parser/tokenizer" ) func TestCodeBlockParser(t *testing.T) { tests := []struct { text string - codeBlock *CodeBlockParser + codeBlock ast.Node }{ { text: "```Hello world!```", @@ -19,21 +20,21 @@ func TestCodeBlockParser(t *testing.T) { }, { text: "```\nHello\n```", - codeBlock: &CodeBlockParser{ + codeBlock: &ast.CodeBlock{ Language: "", Content: "Hello", }, }, { text: "```\nHello world!\n```", - codeBlock: &CodeBlockParser{ + codeBlock: &ast.CodeBlock{ Language: "", Content: "Hello world!", }, }, { text: "```java\nHello \n world!\n```", - codeBlock: &CodeBlockParser{ + codeBlock: &ast.CodeBlock{ Language: "java", Content: "Hello \n world!", }, @@ -48,7 +49,7 @@ func TestCodeBlockParser(t *testing.T) { }, { text: "```java\nHello \n world!\n```\n123123", - codeBlock: &CodeBlockParser{ + codeBlock: &ast.CodeBlock{ Language: "java", Content: "Hello \n world!", }, @@ -57,7 +58,7 @@ func TestCodeBlockParser(t *testing.T) { for _, test := range tests { tokens := tokenizer.Tokenize(test.text) - codeBlock := NewCodeBlockParser() - require.Equal(t, test.codeBlock, codeBlock.Match(tokens)) + parser := NewCodeBlockParser() + require.Equal(t, test.codeBlock, parser.Parse(tokens)) } } diff --git a/plugin/gomark/parser/paragraph.go b/plugin/gomark/parser/paragraph.go index 2b7849e3..dc413e29 100644 --- a/plugin/gomark/parser/paragraph.go +++ b/plugin/gomark/parser/paragraph.go @@ -1,16 +1,21 @@ package parser -import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +import ( + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) type ParagraphParser struct { ContentTokens []*tokenizer.Token } +var defaultParagraphParser = &ParagraphParser{} + func NewParagraphParser() *ParagraphParser { - return &ParagraphParser{} + return defaultParagraphParser } -func (*ParagraphParser) Match(tokens []*tokenizer.Token) *ParagraphParser { +func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) { contentTokens := []*tokenizer.Token{} cursor := 0 for ; cursor < len(tokens); cursor++ { @@ -21,10 +26,21 @@ func (*ParagraphParser) Match(tokens []*tokenizer.Token) *ParagraphParser { contentTokens = append(contentTokens, token) } if len(contentTokens) == 0 { + return 0, false + } + return len(contentTokens), true +} + +func (p *ParagraphParser) Parse(tokens []*tokenizer.Token) ast.Node { + size, ok := p.Match(tokens) + if size == 0 || !ok { return nil } - return &ParagraphParser{ - ContentTokens: contentTokens, - } + contentTokens := tokens[:size] + children := ParseInline(contentTokens, []InlineParser{ + NewBoldParser(), + NewTextParser(), + }) + return ast.NewParagraph(children) } diff --git a/plugin/gomark/parser/paragraph_test.go b/plugin/gomark/parser/paragraph_test.go index d3e0f55b..3bc7c1af 100644 --- a/plugin/gomark/parser/paragraph_test.go +++ b/plugin/gomark/parser/paragraph_test.go @@ -5,73 +5,25 @@ import ( "github.com/stretchr/testify/require" + "github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/parser/tokenizer" ) func TestParagraphParser(t *testing.T) { tests := []struct { text string - paragraph *ParagraphParser + paragraph ast.Node }{ { text: "", paragraph: nil, }, { - text: "Hello world", - paragraph: &ParagraphParser{ - ContentTokens: []*tokenizer.Token{ - { - Type: tokenizer.Text, - Value: "Hello", - }, - { - Type: tokenizer.Space, - Value: " ", - }, - { - Type: tokenizer.Text, - Value: "world", - }, - }, - }, - }, - { - text: `Hello -world`, - paragraph: &ParagraphParser{ - ContentTokens: []*tokenizer.Token{ - { - Type: tokenizer.Text, - Value: "Hello", - }, - { - Type: tokenizer.Space, - Value: " ", - }, - }, - }, - }, - { - text: `Hello \n -world`, - paragraph: &ParagraphParser{ - ContentTokens: []*tokenizer.Token{ - { - Type: tokenizer.Text, - Value: "Hello", - }, - { - Type: tokenizer.Space, - Value: " ", - }, - { - Type: tokenizer.Text, - Value: `\n`, - }, - { - Type: tokenizer.Space, - Value: " ", + text: "Hello world!", + paragraph: &ast.Paragraph{ + Children: []ast.Node{ + &ast.Text{ + Content: "Hello world!", }, }, }, @@ -80,7 +32,7 @@ world`, for _, test := range tests { tokens := tokenizer.Tokenize(test.text) - paragraph := NewParagraphParser() - require.Equal(t, test.paragraph, paragraph.Match(tokens)) + parser := NewParagraphParser() + require.Equal(t, test.paragraph, parser.Parse(tokens)) } } diff --git a/plugin/gomark/parser/parser.go b/plugin/gomark/parser/parser.go index 0bfe2c25..7024d3c1 100644 --- a/plugin/gomark/parser/parser.go +++ b/plugin/gomark/parser/parser.go @@ -1 +1,65 @@ package parser + +import ( + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) + +type Context struct { + BlockParsers []BlockParser + InlineParsers []InlineParser +} + +type BaseParser interface { + Match(tokens []*tokenizer.Token) (int, bool) + Parse(tokens []*tokenizer.Token) ast.Node +} + +type InlineParser interface { + BaseParser +} + +type BlockParser interface { + BaseParser +} + +func Parse(tokens []*tokenizer.Token) []ast.Node { + nodes := []ast.Node{} + blockParsers := []BlockParser{ + NewParagraphParser(), + } + for len(tokens) > 0 { + for _, blockParser := range blockParsers { + cursor, matched := blockParser.Match(tokens) + if matched { + node := blockParser.Parse(tokens) + nodes = append(nodes, node) + tokens = tokens[cursor:] + break + } + } + } + return nodes +} + +func ParseInline(tokens []*tokenizer.Token, inlineParsers []InlineParser) []ast.Node { + nodes := []ast.Node{} + var lastNode ast.Node + for len(tokens) > 0 { + for _, inlineParser := range inlineParsers { + cursor, matched := inlineParser.Match(tokens) + if matched { + node := inlineParser.Parse(tokens) + if node.Type() == ast.NodeTypeText && lastNode != nil && lastNode.Type() == ast.NodeTypeText { + lastNode.(*ast.Text).Content += node.(*ast.Text).Content + } else { + nodes = append(nodes, node) + lastNode = node + } + tokens = tokens[cursor:] + break + } + } + } + return nodes +} diff --git a/plugin/gomark/parser/parser_test.go b/plugin/gomark/parser/parser_test.go new file mode 100644 index 00000000..17a561bc --- /dev/null +++ b/plugin/gomark/parser/parser_test.go @@ -0,0 +1,71 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) + +func TestParser(t *testing.T) { + tests := []struct { + text string + nodes []ast.Node + }{ + { + text: "Hello world!", + nodes: []ast.Node{ + &ast.Paragraph{ + Children: []ast.Node{ + &ast.Text{ + Content: "Hello world!", + }, + }, + }, + }, + }, + { + text: "**Hello** world!", + nodes: []ast.Node{ + &ast.Paragraph{ + Children: []ast.Node{ + &ast.Bold{ + Symbol: "*", + Content: "Hello", + }, + &ast.Text{ + Content: " world!", + }, + }, + }, + }, + }, + { + text: "Hello **world**!", + nodes: []ast.Node{ + &ast.Paragraph{ + Children: []ast.Node{ + &ast.Text{ + Content: "Hello ", + }, + &ast.Bold{ + Symbol: "*", + Content: "world", + }, + &ast.Text{ + Content: "!", + }, + }, + }, + }, + }, + } + + for _, test := range tests { + tokens := tokenizer.Tokenize(test.text) + nodes := Parse(tokens) + require.Equal(t, test.nodes, nodes) + } +} diff --git a/plugin/gomark/parser/text.go b/plugin/gomark/parser/text.go new file mode 100644 index 00000000..0db94bf4 --- /dev/null +++ b/plugin/gomark/parser/text.go @@ -0,0 +1,30 @@ +package parser + +import ( + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) + +type TextParser struct { + Content string +} + +var defaultTextParser = &TextParser{} + +func NewTextParser() *TextParser { + return defaultTextParser +} + +func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) { + if len(tokens) == 0 { + return 0, false + } + return 1, true +} + +func (*TextParser) Parse(tokens []*tokenizer.Token) ast.Node { + if len(tokens) == 0 { + return ast.NewText("") + } + return ast.NewText(tokens[0].String()) +} diff --git a/plugin/gomark/parser/tokenizer/tokenizer.go b/plugin/gomark/parser/tokenizer/tokenizer.go index e73b0ff0..a9c9f42c 100644 --- a/plugin/gomark/parser/tokenizer/tokenizer.go +++ b/plugin/gomark/parser/tokenizer/tokenizer.go @@ -72,3 +72,15 @@ func Tokenize(text string) []*Token { } return tokens } + +func (t *Token) String() string { + return t.Value +} + +func Stringify(tokens []*Token) string { + text := "" + for _, token := range tokens { + text += token.String() + } + return text +}