From aecffe3402012a335c7c83d63daab9da915f4b55 Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 18 Jan 2024 10:21:08 +0800 Subject: [PATCH] feat: implement table parser --- plugin/gomark/ast/ast.go | 1 + plugin/gomark/ast/block.go | 39 +++++ plugin/gomark/parser/parser.go | 1 + plugin/gomark/parser/table.go | 164 ++++++++++++++++++++ plugin/gomark/parser/table_test.go | 45 ++++++ plugin/gomark/parser/tokenizer/tokenizer.go | 22 +++ 6 files changed, 272 insertions(+) create mode 100644 plugin/gomark/parser/table.go create mode 100644 plugin/gomark/parser/table_test.go diff --git a/plugin/gomark/ast/ast.go b/plugin/gomark/ast/ast.go index fdcdd9ae..5da1bc61 100644 --- a/plugin/gomark/ast/ast.go +++ b/plugin/gomark/ast/ast.go @@ -15,6 +15,7 @@ const ( UnorderedListNode TaskListNode MathBlockNode + TableNode // Inline nodes. TextNode BoldNode diff --git a/plugin/gomark/ast/block.go b/plugin/gomark/ast/block.go index f5aa60fb..ae04abe7 100644 --- a/plugin/gomark/ast/block.go +++ b/plugin/gomark/ast/block.go @@ -194,3 +194,42 @@ func (*MathBlock) Type() NodeType { func (n *MathBlock) Restore() string { return fmt.Sprintf("$$\n%s\n$$", n.Content) } + +type Table struct { + BaseBlock + + Header []string + // Delimiter is the list of delimiter counts. + Delimiter []int + Rows [][]string +} + +func (*Table) Type() NodeType { + return TableNode +} + +func (n *Table) Restore() string { + var result string + for _, header := range n.Header { + result += fmt.Sprintf("| %s ", header) + } + result += "|\n" + for _, d := range n.Delimiter { + symbol := "" + for i := 0; i < d; i++ { + symbol += "-" + } + result += fmt.Sprintf("| %s ", symbol) + } + result += "|\n" + for index, row := range n.Rows { + for _, cell := range row { + result += fmt.Sprintf("| %s ", cell) + } + result += "|" + if index != len(n.Rows)-1 { + result += "\n" + } + } + return result +} diff --git a/plugin/gomark/parser/parser.go b/plugin/gomark/parser/parser.go index 4f1de8a7..b819471a 100644 --- a/plugin/gomark/parser/parser.go +++ b/plugin/gomark/parser/parser.go @@ -31,6 +31,7 @@ func Parse(tokens []*tokenizer.Token) ([]ast.Node, error) { var defaultBlockParsers = []BlockParser{ NewCodeBlockParser(), + NewTableParser(), NewHorizontalRuleParser(), NewHeadingParser(), NewBlockquoteParser(), diff --git a/plugin/gomark/parser/table.go b/plugin/gomark/parser/table.go new file mode 100644 index 00000000..b4d36130 --- /dev/null +++ b/plugin/gomark/parser/table.go @@ -0,0 +1,164 @@ +package parser + +import ( + "errors" + + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" +) + +type TableParser struct{} + +func NewTableParser() *TableParser { + return &TableParser{} +} + +func (*TableParser) Match(tokens []*tokenizer.Token) (int, bool) { + headerTokens := []*tokenizer.Token{} + for _, token := range tokens { + if token.Type == tokenizer.Newline { + break + } else { + headerTokens = append(headerTokens, token) + } + } + if len(headerTokens) < 5 || len(tokens) < len(headerTokens)+3 { + return 0, false + } + + alignTokens := []*tokenizer.Token{} + for _, token := range tokens[len(headerTokens)+1:] { + if token.Type == tokenizer.Newline { + break + } else { + alignTokens = append(alignTokens, token) + } + } + if len(alignTokens) < 5 || len(tokens) < len(headerTokens)+len(alignTokens)+3 { + return 0, false + } + + rowTokens := []*tokenizer.Token{} + for index, token := range tokens[len(headerTokens)+len(alignTokens)+2:] { + temp := len(headerTokens) + len(alignTokens) + 2 + index + if token.Type == tokenizer.Newline && temp != len(tokens)-1 && tokens[temp+1].Type != tokenizer.Pipe { + break + } else { + rowTokens = append(rowTokens, token) + } + } + if len(rowTokens) < 5 { + return 0, false + } + + // Check header. + if len(headerTokens) < 5 { + return 0, false + } + headerCells, ok := matchTableCellTokens(headerTokens) + if headerCells == 0 || !ok { + return 0, false + } + + // Check align. + if len(alignTokens) < 5 { + return 0, false + } + alignCells, ok := matchTableCellTokens(alignTokens) + if alignCells != headerCells || !ok { + return 0, false + } + for _, t := range tokenizer.Split(alignTokens, tokenizer.Pipe) { + delimiterTokens := t[1 : len(t)-1] + if len(delimiterTokens) < 3 { + return 0, false + } + for _, token := range delimiterTokens { + if token.Type != tokenizer.Hyphen { + return 0, false + } + } + } + + // Check rows. + if len(rowTokens) < 5 { + return 0, false + } + rows := tokenizer.Split(rowTokens, tokenizer.Newline) + if len(rows) == 0 { + return 0, false + } + for _, row := range rows { + cells, ok := matchTableCellTokens(row) + if cells != headerCells || !ok { + return 0, false + } + } + + return len(headerTokens) + len(alignTokens) + len(rowTokens) + 2, true +} + +func (p *TableParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) { + size, ok := p.Match(tokens) + if size == 0 || !ok { + return nil, errors.New("not matched") + } + + rawRows := tokenizer.Split(tokens[:size-1], tokenizer.Newline) + headerTokens := rawRows[0] + alignTokens := rawRows[1] + rowTokens := rawRows[2:] + header := make([]string, 0) + delimiter := make([]int, 0) + rows := make([][]string, 0) + + for _, t := range tokenizer.Split(headerTokens, tokenizer.Pipe) { + header = append(header, tokenizer.Stringify(t[1:len(t)-1])) + } + for _, t := range tokenizer.Split(alignTokens, tokenizer.Pipe) { + delimiter = append(delimiter, len(t[1:len(t)-1])) + } + for _, row := range rowTokens { + cells := make([]string, 0) + for _, t := range tokenizer.Split(row, tokenizer.Pipe) { + cells = append(cells, tokenizer.Stringify(t[1:len(t)-1])) + } + rows = append(rows, cells) + } + + return &ast.Table{ + Header: header, + Delimiter: delimiter, + Rows: rows, + }, nil +} + +func matchTableCellTokens(tokens []*tokenizer.Token) (int, bool) { + if len(tokens) == 0 { + return 0, false + } + + pipes := 0 + for _, token := range tokens { + if token.Type == tokenizer.Pipe { + pipes++ + } + } + cells := tokenizer.Split(tokens, tokenizer.Pipe) + if len(cells) != pipes-1 { + return 0, false + } + for _, cellTokens := range cells { + if len(cellTokens) == 0 { + return 0, false + } + if cellTokens[0].Type != tokenizer.Space { + return 0, false + } + if cellTokens[len(cellTokens)-1].Type != tokenizer.Space { + return 0, false + } + } + + return len(cells), true +} diff --git a/plugin/gomark/parser/table_test.go b/plugin/gomark/parser/table_test.go new file mode 100644 index 00000000..d40cc9be --- /dev/null +++ b/plugin/gomark/parser/table_test.go @@ -0,0 +1,45 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/usememos/memos/plugin/gomark/ast" + "github.com/usememos/memos/plugin/gomark/parser/tokenizer" + "github.com/usememos/memos/plugin/gomark/restore" +) + +func TestTableParser(t *testing.T) { + tests := []struct { + text string + table ast.Node + }{ + { + text: "| header |\n| --- |\n| cell |\n", + table: &ast.Table{ + Header: []string{"header"}, + Delimiter: []int{3}, + Rows: [][]string{ + {"cell"}, + }, + }, + }, + { + text: "| header1 | header2 |\n| --- | ---- |\n| cell1 | cell2 |\n| cell3 | cell4 |", + table: &ast.Table{ + Header: []string{"header1", "header2"}, + Delimiter: []int{3, 4}, + Rows: [][]string{ + {"cell1", "cell2"}, + {"cell3", "cell4"}, + }, + }, + }, + } + + for _, test := range tests { + tokens := tokenizer.Tokenize(test.text) + node, _ := NewTableParser().Parse(tokens) + require.Equal(t, restore.Restore([]ast.Node{test.table}), restore.Restore([]ast.Node{node})) + } +} diff --git a/plugin/gomark/parser/tokenizer/tokenizer.go b/plugin/gomark/parser/tokenizer/tokenizer.go index 56435554..471e5bc5 100644 --- a/plugin/gomark/parser/tokenizer/tokenizer.go +++ b/plugin/gomark/parser/tokenizer/tokenizer.go @@ -20,6 +20,7 @@ const ( GreaterThan TokenType = ">" DollarSign TokenType = "$" EqualSign TokenType = "=" + Pipe TokenType = "|" Backslash TokenType = "\\" Newline TokenType = "\n" Space TokenType = " " @@ -80,6 +81,8 @@ func Tokenize(text string) []*Token { tokens = append(tokens, NewToken(DollarSign, "$")) case '=': tokens = append(tokens, NewToken(EqualSign, "=")) + case '|': + tokens = append(tokens, NewToken(Pipe, "|")) case '\\': tokens = append(tokens, NewToken(Backslash, `\`)) case '\n': @@ -121,3 +124,22 @@ func Stringify(tokens []*Token) string { } return text } + +func Split(tokens []*Token, delimiter TokenType) [][]*Token { + result := make([][]*Token, 0) + current := make([]*Token, 0) + for _, token := range tokens { + if token.Type == delimiter { + if len(current) > 0 { + result = append(result, current) + current = make([]*Token, 0) + } + } else { + current = append(current, token) + } + } + if len(current) > 0 { + result = append(result, current) + } + return result +} \ No newline at end of file