chore: implement gomark skeleton

2025-10-27 14:56:30 +08:00 · 2023-12-12 23:24:02 +08:00 · 2023-12-12 23:24:02 +08:00 · aa3632e2ac
commit aa3632e2ac
parent 7f1f6f77a0
15 changed files with 393 additions and 172 deletions
--- a/.golangci.yaml
+++ b/.golangci.yaml
@ -67,6 +67,8 @@ linters-settings:
        disabled: true
      - name: early-return
        disabled: true
+      - name: use-any
+        disabled: true
  gocritic:
    disabled-checks:
      - ifElseChain
--- a/plugin/gomark/ast/ast.go
+++ b/plugin/gomark/ast/ast.go
@ -1,19 +1 @@
 package ast
-
-type Node struct {
-	Type     string
-	Text     string
-	Children []*Node
-}
-
-type Document struct {
-	Nodes []*Node
-}
-
-func NewDocument() *Document {
-	return &Document{}
-}
-
-func (d *Document) AddNode(node *Node) {
-	d.Nodes = append(d.Nodes, node)
-}
--- a/plugin/gomark/ast/block.go
+++ b/plugin/gomark/ast/block.go
@ -0,0 +1,42 @@
+package ast
+
+type BaseBlock struct {
+}
+
+type Paragraph struct {
+	BaseBlock
+
+	Children []Node
+}
+
+var NodeTypeParagraph = NewNodeType("Paragraph")
+
+func NewParagraph(children []Node) *Paragraph {
+	return &Paragraph{
+		Children: children,
+	}
+}
+
+func (*Paragraph) Type() NodeType {
+	return NodeTypeParagraph
+}
+
+type CodeBlock struct {
+	BaseBlock
+
+	Language string
+	Content  string
+}
+
+var NodeTypeCodeBlock = NewNodeType("CodeBlock")
+
+func NewCodeBlock(language, content string) *CodeBlock {
+	return &CodeBlock{
+		Language: language,
+		Content:  content,
+	}
+}
+
+func (*CodeBlock) Type() NodeType {
+	return NodeTypeCodeBlock
+}
--- a/plugin/gomark/ast/inline.go
+++ b/plugin/gomark/ast/inline.go
@ -0,0 +1,42 @@
+package ast
+
+type BaseInline struct{}
+
+type Text struct {
+	BaseInline
+
+	Content string
+}
+
+var NodeTypeText = NewNodeType("Text")
+
+func NewText(content string) *Text {
+	return &Text{
+		Content: content,
+	}
+}
+
+func (*Text) Type() NodeType {
+	return NodeTypeText
+}
+
+type Bold struct {
+	BaseInline
+
+	// Symbol is "*" or "_"
+	Symbol  string
+	Content string
+}
+
+var NodeTypeBold = NewNodeType("Bold")
+
+func NewBold(symbol, content string) *Bold {
+	return &Bold{
+		Symbol:  symbol,
+		Content: content,
+	}
+}
+
+func (*Bold) Type() NodeType {
+	return NodeTypeBold
+}
--- a/plugin/gomark/ast/node.go
+++ b/plugin/gomark/ast/node.go
@ -1,12 +1,20 @@
 package ast

-func NewNode(tp, text string) *Node {
-	return &Node{
-		Type: tp,
-		Text: text,
-	}
+type Node interface {
+	Type() NodeType
 }

-func (n *Node) AddChild(child *Node) {
-	n.Children = append(n.Children, child)
+type NodeType int
+
+func (t NodeType) String() string {
+	return nodeTypeNames[t]
+}
+
+var nodeTypeIndex NodeType
+var nodeTypeNames = []string{""}
+
+func NewNodeType(name string) NodeType {
+	nodeTypeNames = append(nodeTypeNames, name)
+	nodeTypeIndex++
+	return nodeTypeIndex
 }
--- a/plugin/gomark/parser/bold.go
+++ b/plugin/gomark/parser/bold.go
@ -1,49 +1,60 @@
 package parser

 import (
+	"github.com/usememos/memos/plugin/gomark/ast"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )

-type BoldParser struct {
-	ContentTokens []*tokenizer.Token
+type BoldParser struct{}
+
+var defaultBoldParser = &BoldParser{}
+
+func NewBoldParser() InlineParser {
+	return defaultBoldParser
 }

-func NewBoldParser() *BoldParser {
-	return &BoldParser{}
-}
-
-func (*BoldParser) Match(tokens []*tokenizer.Token) *BoldParser {
+func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) {
 	if len(tokens) < 5 {
-		return nil
+		return 0, false
 	}

 	prefixTokens := tokens[:2]
 	if prefixTokens[0].Type != prefixTokens[1].Type {
-		return nil
+		return 0, false
 	}
 	prefixTokenType := prefixTokens[0].Type
 	if prefixTokenType != tokenizer.Star && prefixTokenType != tokenizer.Underline {
-		return nil
+		return 0, false
 	}

-	contentTokens := []*tokenizer.Token{}
 	cursor, matched := 2, false
 	for ; cursor < len(tokens)-1; cursor++ {
 		token, nextToken := tokens[cursor], tokens[cursor+1]
 		if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
-			return nil
+			return 0, false
 		}
 		if token.Type == prefixTokenType && nextToken.Type == prefixTokenType {
 			matched = true
 			break
 		}
-		contentTokens = append(contentTokens, token)
 	}
 	if !matched {
+		return 0, false
+	}
+
+	return cursor + 2, true
+}
+
+func (p *BoldParser) Parse(tokens []*tokenizer.Token) ast.Node {
+	size, ok := p.Match(tokens)
+	if size == 0 || !ok {
 		return nil
 	}

-	return &BoldParser{
-		ContentTokens: contentTokens,
+	prefixTokenType := tokens[0].Type
+	contentTokens := tokens[2 : size-2]
+	return &ast.Bold{
+		Symbol:  prefixTokenType,
+		Content: tokenizer.Stringify(contentTokens),
 	}
 }
--- a/plugin/gomark/parser/bold_test.go
+++ b/plugin/gomark/parser/bold_test.go
@ -5,13 +5,14 @@ import (

 	"github.com/stretchr/testify/require"

+	"github.com/usememos/memos/plugin/gomark/ast"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )

 func TestBoldParser(t *testing.T) {
 	tests := []struct {
 		text string
-		bold *BoldParser
+		bold ast.Node
 	}{
 		{
 			text: "*Hello world!",
@ -19,32 +20,16 @@ func TestBoldParser(t *testing.T) {
 		},
 		{
 			text: "**Hello**",
-			bold: &BoldParser{
-				ContentTokens: []*tokenizer.Token{
-					{
-						Type:  tokenizer.Text,
-						Value: "Hello",
-					},
-				},
+			bold: &ast.Bold{
+				Symbol:  "*",
+				Content: "Hello",
 			},
 		},
 		{
 			text: "** Hello **",
-			bold: &BoldParser{
-				ContentTokens: []*tokenizer.Token{
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
-					},
-					{
-						Type:  tokenizer.Text,
-						Value: "Hello",
-					},
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
-					},
-				},
+			bold: &ast.Bold{
+				Symbol:  "*",
+				Content: " Hello ",
 			},
 		},
 		{
@ -55,35 +40,11 @@ func TestBoldParser(t *testing.T) {
 			text: "* * Hello **",
 			bold: nil,
 		},
-		{
-			text: `** Hello 
-**`,
-			bold: nil,
-		},
-		{
-			text: `**Hello \n**`,
-			bold: &BoldParser{
-				ContentTokens: []*tokenizer.Token{
-					{
-						Type:  tokenizer.Text,
-						Value: "Hello",
-					},
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
-					},
-					{
-						Type:  tokenizer.Text,
-						Value: `\n`,
-					},
-				},
-			},
-		},
 	}

 	for _, test := range tests {
 		tokens := tokenizer.Tokenize(test.text)
-		bold := NewBoldParser()
-		require.Equal(t, test.bold, bold.Match(tokens))
+		parser := NewBoldParser()
+		require.Equal(t, test.bold, parser.Parse(tokens))
 	}
 }
--- a/plugin/gomark/parser/code_block.go
+++ b/plugin/gomark/parser/code_block.go
@ -1,52 +1,79 @@
 package parser

-import "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+import (
+	"github.com/usememos/memos/plugin/gomark/ast"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+)

 type CodeBlockParser struct {
 	Language string
 	Content  string
 }

+var defaultCodeBlockParser = &CodeBlockParser{}
+
 func NewCodeBlockParser() *CodeBlockParser {
-	return &CodeBlockParser{}
+	return defaultCodeBlockParser
 }

-func (*CodeBlockParser) Match(tokens []*tokenizer.Token) *CodeBlockParser {
+func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {
 	if len(tokens) < 9 {
-		return nil
+		return 0, false
 	}

 	if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick {
-		return nil
+		return 0, false
 	}
 	if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline {
-		return nil
+		return 0, false
 	}
-	cursor, language := 4, ""
+	cursor := 4
 	if tokens[3].Type != tokenizer.Newline {
-		language = tokens[3].Value
 		cursor = 5
 	}

-	content, matched := "", false
+	matched := false
 	for ; cursor < len(tokens)-3; cursor++ {
 		if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.Backtick && tokens[cursor+2].Type == tokenizer.Backtick && tokens[cursor+3].Type == tokenizer.Backtick {
 			if cursor+3 == len(tokens)-1 {
+				cursor += 4
 				matched = true
 				break
 			} else if tokens[cursor+4].Type == tokenizer.Newline {
+				cursor += 5
 				matched = true
 				break
 			}
 		}
-		content += tokens[cursor].Value
 	}
 	if !matched {
+		return 0, false
+	}
+
+	return cursor, true
+}
+
+func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) ast.Node {
+	size, ok := p.Match(tokens)
+	if size == 0 || !ok {
 		return nil
 	}

-	return &CodeBlockParser{
-		Language: language,
-		Content:  content,
+	languageToken := tokens[3]
+	contentStart, contentEnd := 5, size-4
+	if languageToken.Type == tokenizer.Newline {
+		languageToken = nil
+		contentStart = 4
 	}
+	if tokens[size-1].Type == tokenizer.Newline {
+		contentEnd = size - 5
+	}
+
+	codeBlock := &ast.CodeBlock{
+		Content: tokenizer.Stringify(tokens[contentStart:contentEnd]),
+	}
+	if languageToken != nil {
+		codeBlock.Language = languageToken.String()
+	}
+	return codeBlock
 }
--- a/plugin/gomark/parser/code_block_test.go
+++ b/plugin/gomark/parser/code_block_test.go
@ -5,13 +5,14 @@ import (

 	"github.com/stretchr/testify/require"

+	"github.com/usememos/memos/plugin/gomark/ast"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )

 func TestCodeBlockParser(t *testing.T) {
 	tests := []struct {
 		text      string
-		codeBlock *CodeBlockParser
+		codeBlock ast.Node
 	}{
 		{
 			text:      "```Hello world!```",
@ -19,21 +20,21 @@ func TestCodeBlockParser(t *testing.T) {
 		},
 		{
 			text: "```\nHello\n```",
-			codeBlock: &CodeBlockParser{
+			codeBlock: &ast.CodeBlock{
 				Language: "",
 				Content:  "Hello",
 			},
 		},
 		{
 			text: "```\nHello world!\n```",
-			codeBlock: &CodeBlockParser{
+			codeBlock: &ast.CodeBlock{
 				Language: "",
 				Content:  "Hello world!",
 			},
 		},
 		{
 			text: "```java\nHello \n world!\n```",
-			codeBlock: &CodeBlockParser{
+			codeBlock: &ast.CodeBlock{
 				Language: "java",
 				Content:  "Hello \n world!",
 			},
@ -48,7 +49,7 @@ func TestCodeBlockParser(t *testing.T) {
 		},
 		{
 			text: "```java\nHello \n world!\n```\n123123",
-			codeBlock: &CodeBlockParser{
+			codeBlock: &ast.CodeBlock{
 				Language: "java",
 				Content:  "Hello \n world!",
 			},
@ -57,7 +58,7 @@ func TestCodeBlockParser(t *testing.T) {

 	for _, test := range tests {
 		tokens := tokenizer.Tokenize(test.text)
-		codeBlock := NewCodeBlockParser()
-		require.Equal(t, test.codeBlock, codeBlock.Match(tokens))
+		parser := NewCodeBlockParser()
+		require.Equal(t, test.codeBlock, parser.Parse(tokens))
 	}
 }
--- a/plugin/gomark/parser/paragraph.go
+++ b/plugin/gomark/parser/paragraph.go
@ -1,16 +1,21 @@
 package parser

-import "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+import (
+	"github.com/usememos/memos/plugin/gomark/ast"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+)

 type ParagraphParser struct {
 	ContentTokens []*tokenizer.Token
 }

+var defaultParagraphParser = &ParagraphParser{}
+
 func NewParagraphParser() *ParagraphParser {
-	return &ParagraphParser{}
+	return defaultParagraphParser
 }

-func (*ParagraphParser) Match(tokens []*tokenizer.Token) *ParagraphParser {
+func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) {
 	contentTokens := []*tokenizer.Token{}
 	cursor := 0
 	for ; cursor < len(tokens); cursor++ {
@ -21,10 +26,21 @@ func (*ParagraphParser) Match(tokens []*tokenizer.Token) *ParagraphParser {
 		contentTokens = append(contentTokens, token)
 	}
 	if len(contentTokens) == 0 {
+		return 0, false
+	}
+	return len(contentTokens), true
+}
+
+func (p *ParagraphParser) Parse(tokens []*tokenizer.Token) ast.Node {
+	size, ok := p.Match(tokens)
+	if size == 0 || !ok {
 		return nil
 	}

-	return &ParagraphParser{
-		ContentTokens: contentTokens,
-	}
+	contentTokens := tokens[:size]
+	children := ParseInline(contentTokens, []InlineParser{
+		NewBoldParser(),
+		NewTextParser(),
+	})
+	return ast.NewParagraph(children)
 }
--- a/plugin/gomark/parser/paragraph_test.go
+++ b/plugin/gomark/parser/paragraph_test.go
@ -5,73 +5,25 @@ import (

 	"github.com/stretchr/testify/require"

+	"github.com/usememos/memos/plugin/gomark/ast"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )

 func TestParagraphParser(t *testing.T) {
 	tests := []struct {
 		text      string
-		paragraph *ParagraphParser
+		paragraph ast.Node
 	}{
 		{
 			text:      "",
 			paragraph: nil,
 		},
 		{
-			text: "Hello world",
-			paragraph: &ParagraphParser{
-				ContentTokens: []*tokenizer.Token{
-					{
-						Type:  tokenizer.Text,
-						Value: "Hello",
-					},
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
-					},
-					{
-						Type:  tokenizer.Text,
-						Value: "world",
-					},
-				},
-			},
-		},
-		{
-			text: `Hello 
-world`,
-			paragraph: &ParagraphParser{
-				ContentTokens: []*tokenizer.Token{
-					{
-						Type:  tokenizer.Text,
-						Value: "Hello",
-					},
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
-					},
-				},
-			},
-		},
-		{
-			text: `Hello \n 
-world`,
-			paragraph: &ParagraphParser{
-				ContentTokens: []*tokenizer.Token{
-					{
-						Type:  tokenizer.Text,
-						Value: "Hello",
-					},
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
-					},
-					{
-						Type:  tokenizer.Text,
-						Value: `\n`,
-					},
-					{
-						Type:  tokenizer.Space,
-						Value: " ",
+			text: "Hello world!",
+			paragraph: &ast.Paragraph{
+				Children: []ast.Node{
+					&ast.Text{
+						Content: "Hello world!",
 					},
 				},
 			},
@ -80,7 +32,7 @@ world`,

 	for _, test := range tests {
 		tokens := tokenizer.Tokenize(test.text)
-		paragraph := NewParagraphParser()
-		require.Equal(t, test.paragraph, paragraph.Match(tokens))
+		parser := NewParagraphParser()
+		require.Equal(t, test.paragraph, parser.Parse(tokens))
 	}
 }
--- a/plugin/gomark/parser/parser.go
+++ b/plugin/gomark/parser/parser.go
@ -1 +1,65 @@
 package parser
+
+import (
+	"github.com/usememos/memos/plugin/gomark/ast"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+)
+
+type Context struct {
+	BlockParsers  []BlockParser
+	InlineParsers []InlineParser
+}
+
+type BaseParser interface {
+	Match(tokens []*tokenizer.Token) (int, bool)
+	Parse(tokens []*tokenizer.Token) ast.Node
+}
+
+type InlineParser interface {
+	BaseParser
+}
+
+type BlockParser interface {
+	BaseParser
+}
+
+func Parse(tokens []*tokenizer.Token) []ast.Node {
+	nodes := []ast.Node{}
+	blockParsers := []BlockParser{
+		NewParagraphParser(),
+	}
+	for len(tokens) > 0 {
+		for _, blockParser := range blockParsers {
+			cursor, matched := blockParser.Match(tokens)
+			if matched {
+				node := blockParser.Parse(tokens)
+				nodes = append(nodes, node)
+				tokens = tokens[cursor:]
+				break
+			}
+		}
+	}
+	return nodes
+}
+
+func ParseInline(tokens []*tokenizer.Token, inlineParsers []InlineParser) []ast.Node {
+	nodes := []ast.Node{}
+	var lastNode ast.Node
+	for len(tokens) > 0 {
+		for _, inlineParser := range inlineParsers {
+			cursor, matched := inlineParser.Match(tokens)
+			if matched {
+				node := inlineParser.Parse(tokens)
+				if node.Type() == ast.NodeTypeText && lastNode != nil && lastNode.Type() == ast.NodeTypeText {
+					lastNode.(*ast.Text).Content += node.(*ast.Text).Content
+				} else {
+					nodes = append(nodes, node)
+					lastNode = node
+				}
+				tokens = tokens[cursor:]
+				break
+			}
+		}
+	}
+	return nodes
+}
--- a/plugin/gomark/parser/parser_test.go
+++ b/plugin/gomark/parser/parser_test.go
@ -0,0 +1,71 @@
+package parser
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/usememos/memos/plugin/gomark/ast"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+)
+
+func TestParser(t *testing.T) {
+	tests := []struct {
+		text  string
+		nodes []ast.Node
+	}{
+		{
+			text: "Hello world!",
+			nodes: []ast.Node{
+				&ast.Paragraph{
+					Children: []ast.Node{
+						&ast.Text{
+							Content: "Hello world!",
+						},
+					},
+				},
+			},
+		},
+		{
+			text: "**Hello** world!",
+			nodes: []ast.Node{
+				&ast.Paragraph{
+					Children: []ast.Node{
+						&ast.Bold{
+							Symbol:  "*",
+							Content: "Hello",
+						},
+						&ast.Text{
+							Content: " world!",
+						},
+					},
+				},
+			},
+		},
+		{
+			text: "Hello **world**!",
+			nodes: []ast.Node{
+				&ast.Paragraph{
+					Children: []ast.Node{
+						&ast.Text{
+							Content: "Hello ",
+						},
+						&ast.Bold{
+							Symbol:  "*",
+							Content: "world",
+						},
+						&ast.Text{
+							Content: "!",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		tokens := tokenizer.Tokenize(test.text)
+		nodes := Parse(tokens)
+		require.Equal(t, test.nodes, nodes)
+	}
+}
--- a/plugin/gomark/parser/text.go
+++ b/plugin/gomark/parser/text.go
@ -0,0 +1,30 @@
+package parser
+
+import (
+	"github.com/usememos/memos/plugin/gomark/ast"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+)
+
+type TextParser struct {
+	Content string
+}
+
+var defaultTextParser = &TextParser{}
+
+func NewTextParser() *TextParser {
+	return defaultTextParser
+}
+
+func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) {
+	if len(tokens) == 0 {
+		return 0, false
+	}
+	return 1, true
+}
+
+func (*TextParser) Parse(tokens []*tokenizer.Token) ast.Node {
+	if len(tokens) == 0 {
+		return ast.NewText("")
+	}
+	return ast.NewText(tokens[0].String())
+}
--- a/plugin/gomark/parser/tokenizer/tokenizer.go
+++ b/plugin/gomark/parser/tokenizer/tokenizer.go
@ -72,3 +72,15 @@ func Tokenize(text string) []*Token {
 	}
 	return tokens
 }
+
+func (t *Token) String() string {
+	return t.Value
+}
+
+func Stringify(tokens []*Token) string {
+	text := ""
+	for _, token := range tokens {
+		text += token.String()
+	}
+	return text
+}