chore: implement gomark skeleton

This commit is contained in:
Steven 2023-12-12 23:24:02 +08:00
parent 7f1f6f77a0
commit aa3632e2ac
15 changed files with 393 additions and 172 deletions

View file

@ -67,6 +67,8 @@ linters-settings:
disabled: true
- name: early-return
disabled: true
- name: use-any
disabled: true
gocritic:
disabled-checks:
- ifElseChain

View file

@ -1,19 +1 @@
package ast
type Node struct {
Type string
Text string
Children []*Node
}
type Document struct {
Nodes []*Node
}
func NewDocument() *Document {
return &Document{}
}
func (d *Document) AddNode(node *Node) {
d.Nodes = append(d.Nodes, node)
}

View file

@ -0,0 +1,42 @@
package ast
type BaseBlock struct {
}
type Paragraph struct {
BaseBlock
Children []Node
}
var NodeTypeParagraph = NewNodeType("Paragraph")
func NewParagraph(children []Node) *Paragraph {
return &Paragraph{
Children: children,
}
}
func (*Paragraph) Type() NodeType {
return NodeTypeParagraph
}
type CodeBlock struct {
BaseBlock
Language string
Content string
}
var NodeTypeCodeBlock = NewNodeType("CodeBlock")
func NewCodeBlock(language, content string) *CodeBlock {
return &CodeBlock{
Language: language,
Content: content,
}
}
func (*CodeBlock) Type() NodeType {
return NodeTypeCodeBlock
}

View file

@ -0,0 +1,42 @@
package ast
type BaseInline struct{}
type Text struct {
BaseInline
Content string
}
var NodeTypeText = NewNodeType("Text")
func NewText(content string) *Text {
return &Text{
Content: content,
}
}
func (*Text) Type() NodeType {
return NodeTypeText
}
type Bold struct {
BaseInline
// Symbol is "*" or "_"
Symbol string
Content string
}
var NodeTypeBold = NewNodeType("Bold")
func NewBold(symbol, content string) *Bold {
return &Bold{
Symbol: symbol,
Content: content,
}
}
func (*Bold) Type() NodeType {
return NodeTypeBold
}

View file

@ -1,12 +1,20 @@
package ast
func NewNode(tp, text string) *Node {
return &Node{
Type: tp,
Text: text,
}
type Node interface {
Type() NodeType
}
func (n *Node) AddChild(child *Node) {
n.Children = append(n.Children, child)
type NodeType int
func (t NodeType) String() string {
return nodeTypeNames[t]
}
var nodeTypeIndex NodeType
var nodeTypeNames = []string{""}
func NewNodeType(name string) NodeType {
nodeTypeNames = append(nodeTypeNames, name)
nodeTypeIndex++
return nodeTypeIndex
}

View file

@ -1,49 +1,60 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type BoldParser struct {
ContentTokens []*tokenizer.Token
type BoldParser struct{}
var defaultBoldParser = &BoldParser{}
func NewBoldParser() InlineParser {
return defaultBoldParser
}
func NewBoldParser() *BoldParser {
return &BoldParser{}
}
func (*BoldParser) Match(tokens []*tokenizer.Token) *BoldParser {
func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return nil
return 0, false
}
prefixTokens := tokens[:2]
if prefixTokens[0].Type != prefixTokens[1].Type {
return nil
return 0, false
}
prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Star && prefixTokenType != tokenizer.Underline {
return nil
return 0, false
}
contentTokens := []*tokenizer.Token{}
cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return nil
return 0, false
}
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType {
matched = true
break
}
contentTokens = append(contentTokens, token)
}
if !matched {
return 0, false
}
return cursor + 2, true
}
func (p *BoldParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
return &BoldParser{
ContentTokens: contentTokens,
prefixTokenType := tokens[0].Type
contentTokens := tokens[2 : size-2]
return &ast.Bold{
Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens),
}
}

View file

@ -5,13 +5,14 @@ import (
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestBoldParser(t *testing.T) {
tests := []struct {
text string
bold *BoldParser
bold ast.Node
}{
{
text: "*Hello world!",
@ -19,32 +20,16 @@ func TestBoldParser(t *testing.T) {
},
{
text: "**Hello**",
bold: &BoldParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "Hello",
},
},
bold: &ast.Bold{
Symbol: "*",
Content: "Hello",
},
},
{
text: "** Hello **",
bold: &BoldParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
},
bold: &ast.Bold{
Symbol: "*",
Content: " Hello ",
},
},
{
@ -55,35 +40,11 @@ func TestBoldParser(t *testing.T) {
text: "* * Hello **",
bold: nil,
},
{
text: `** Hello
**`,
bold: nil,
},
{
text: `**Hello \n**`,
bold: &BoldParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: `\n`,
},
},
},
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
bold := NewBoldParser()
require.Equal(t, test.bold, bold.Match(tokens))
parser := NewBoldParser()
require.Equal(t, test.bold, parser.Parse(tokens))
}
}

View file

@ -1,52 +1,79 @@
package parser
import "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type CodeBlockParser struct {
Language string
Content string
}
var defaultCodeBlockParser = &CodeBlockParser{}
func NewCodeBlockParser() *CodeBlockParser {
return &CodeBlockParser{}
return defaultCodeBlockParser
}
func (*CodeBlockParser) Match(tokens []*tokenizer.Token) *CodeBlockParser {
func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 9 {
return nil
return 0, false
}
if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick {
return nil
return 0, false
}
if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline {
return nil
return 0, false
}
cursor, language := 4, ""
cursor := 4
if tokens[3].Type != tokenizer.Newline {
language = tokens[3].Value
cursor = 5
}
content, matched := "", false
matched := false
for ; cursor < len(tokens)-3; cursor++ {
if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.Backtick && tokens[cursor+2].Type == tokenizer.Backtick && tokens[cursor+3].Type == tokenizer.Backtick {
if cursor+3 == len(tokens)-1 {
cursor += 4
matched = true
break
} else if tokens[cursor+4].Type == tokenizer.Newline {
cursor += 5
matched = true
break
}
}
content += tokens[cursor].Value
}
if !matched {
return 0, false
}
return cursor, true
}
func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
return &CodeBlockParser{
Language: language,
Content: content,
languageToken := tokens[3]
contentStart, contentEnd := 5, size-4
if languageToken.Type == tokenizer.Newline {
languageToken = nil
contentStart = 4
}
if tokens[size-1].Type == tokenizer.Newline {
contentEnd = size - 5
}
codeBlock := &ast.CodeBlock{
Content: tokenizer.Stringify(tokens[contentStart:contentEnd]),
}
if languageToken != nil {
codeBlock.Language = languageToken.String()
}
return codeBlock
}

View file

@ -5,13 +5,14 @@ import (
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestCodeBlockParser(t *testing.T) {
tests := []struct {
text string
codeBlock *CodeBlockParser
codeBlock ast.Node
}{
{
text: "```Hello world!```",
@ -19,21 +20,21 @@ func TestCodeBlockParser(t *testing.T) {
},
{
text: "```\nHello\n```",
codeBlock: &CodeBlockParser{
codeBlock: &ast.CodeBlock{
Language: "",
Content: "Hello",
},
},
{
text: "```\nHello world!\n```",
codeBlock: &CodeBlockParser{
codeBlock: &ast.CodeBlock{
Language: "",
Content: "Hello world!",
},
},
{
text: "```java\nHello \n world!\n```",
codeBlock: &CodeBlockParser{
codeBlock: &ast.CodeBlock{
Language: "java",
Content: "Hello \n world!",
},
@ -48,7 +49,7 @@ func TestCodeBlockParser(t *testing.T) {
},
{
text: "```java\nHello \n world!\n```\n123123",
codeBlock: &CodeBlockParser{
codeBlock: &ast.CodeBlock{
Language: "java",
Content: "Hello \n world!",
},
@ -57,7 +58,7 @@ func TestCodeBlockParser(t *testing.T) {
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
codeBlock := NewCodeBlockParser()
require.Equal(t, test.codeBlock, codeBlock.Match(tokens))
parser := NewCodeBlockParser()
require.Equal(t, test.codeBlock, parser.Parse(tokens))
}
}

View file

@ -1,16 +1,21 @@
package parser
import "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type ParagraphParser struct {
ContentTokens []*tokenizer.Token
}
var defaultParagraphParser = &ParagraphParser{}
func NewParagraphParser() *ParagraphParser {
return &ParagraphParser{}
return defaultParagraphParser
}
func (*ParagraphParser) Match(tokens []*tokenizer.Token) *ParagraphParser {
func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens := []*tokenizer.Token{}
cursor := 0
for ; cursor < len(tokens); cursor++ {
@ -21,10 +26,21 @@ func (*ParagraphParser) Match(tokens []*tokenizer.Token) *ParagraphParser {
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens), true
}
func (p *ParagraphParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
return &ParagraphParser{
ContentTokens: contentTokens,
}
contentTokens := tokens[:size]
children := ParseInline(contentTokens, []InlineParser{
NewBoldParser(),
NewTextParser(),
})
return ast.NewParagraph(children)
}

View file

@ -5,73 +5,25 @@ import (
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestParagraphParser(t *testing.T) {
tests := []struct {
text string
paragraph *ParagraphParser
paragraph ast.Node
}{
{
text: "",
paragraph: nil,
},
{
text: "Hello world",
paragraph: &ParagraphParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: "world",
},
},
},
},
{
text: `Hello
world`,
paragraph: &ParagraphParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
},
},
},
{
text: `Hello \n
world`,
paragraph: &ParagraphParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: `\n`,
},
{
Type: tokenizer.Space,
Value: " ",
text: "Hello world!",
paragraph: &ast.Paragraph{
Children: []ast.Node{
&ast.Text{
Content: "Hello world!",
},
},
},
@ -80,7 +32,7 @@ world`,
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
paragraph := NewParagraphParser()
require.Equal(t, test.paragraph, paragraph.Match(tokens))
parser := NewParagraphParser()
require.Equal(t, test.paragraph, parser.Parse(tokens))
}
}

View file

@ -1 +1,65 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type Context struct {
BlockParsers []BlockParser
InlineParsers []InlineParser
}
type BaseParser interface {
Match(tokens []*tokenizer.Token) (int, bool)
Parse(tokens []*tokenizer.Token) ast.Node
}
type InlineParser interface {
BaseParser
}
type BlockParser interface {
BaseParser
}
func Parse(tokens []*tokenizer.Token) []ast.Node {
nodes := []ast.Node{}
blockParsers := []BlockParser{
NewParagraphParser(),
}
for len(tokens) > 0 {
for _, blockParser := range blockParsers {
cursor, matched := blockParser.Match(tokens)
if matched {
node := blockParser.Parse(tokens)
nodes = append(nodes, node)
tokens = tokens[cursor:]
break
}
}
}
return nodes
}
func ParseInline(tokens []*tokenizer.Token, inlineParsers []InlineParser) []ast.Node {
nodes := []ast.Node{}
var lastNode ast.Node
for len(tokens) > 0 {
for _, inlineParser := range inlineParsers {
cursor, matched := inlineParser.Match(tokens)
if matched {
node := inlineParser.Parse(tokens)
if node.Type() == ast.NodeTypeText && lastNode != nil && lastNode.Type() == ast.NodeTypeText {
lastNode.(*ast.Text).Content += node.(*ast.Text).Content
} else {
nodes = append(nodes, node)
lastNode = node
}
tokens = tokens[cursor:]
break
}
}
}
return nodes
}

View file

@ -0,0 +1,71 @@
package parser
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestParser(t *testing.T) {
tests := []struct {
text string
nodes []ast.Node
}{
{
text: "Hello world!",
nodes: []ast.Node{
&ast.Paragraph{
Children: []ast.Node{
&ast.Text{
Content: "Hello world!",
},
},
},
},
},
{
text: "**Hello** world!",
nodes: []ast.Node{
&ast.Paragraph{
Children: []ast.Node{
&ast.Bold{
Symbol: "*",
Content: "Hello",
},
&ast.Text{
Content: " world!",
},
},
},
},
},
{
text: "Hello **world**!",
nodes: []ast.Node{
&ast.Paragraph{
Children: []ast.Node{
&ast.Text{
Content: "Hello ",
},
&ast.Bold{
Symbol: "*",
Content: "world",
},
&ast.Text{
Content: "!",
},
},
},
},
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
nodes := Parse(tokens)
require.Equal(t, test.nodes, nodes)
}
}

View file

@ -0,0 +1,30 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type TextParser struct {
Content string
}
var defaultTextParser = &TextParser{}
func NewTextParser() *TextParser {
return defaultTextParser
}
func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) == 0 {
return 0, false
}
return 1, true
}
func (*TextParser) Parse(tokens []*tokenizer.Token) ast.Node {
if len(tokens) == 0 {
return ast.NewText("")
}
return ast.NewText(tokens[0].String())
}

View file

@ -72,3 +72,15 @@ func Tokenize(text string) []*Token {
}
return tokens
}
func (t *Token) String() string {
return t.Value
}
func Stringify(tokens []*Token) string {
text := ""
for _, token := range tokens {
text += token.String()
}
return text
}