memos/plugin/gomark/parser/tokenizer/tokenizer.go
2023-12-16 11:47:29 +08:00

114 lines
2.7 KiB
Go

package tokenizer
type TokenType = string
const (
Underscore TokenType = "_"
Asterisk TokenType = "*"
PoundSign TokenType = "#"
Backtick TokenType = "`"
LeftSquareBracket TokenType = "["
RightSquareBracket TokenType = "]"
LeftParenthesis TokenType = "("
RightParenthesis TokenType = ")"
ExclamationMark TokenType = "!"
Tilde TokenType = "~"
Hyphen TokenType = "-"
PlusSign TokenType = "+"
Dot TokenType = "."
GreaterThan TokenType = ">"
Backslash TokenType = "\\"
Newline TokenType = "\n"
Space TokenType = " "
)
const (
Number TokenType = "number"
Text TokenType = ""
)
type Token struct {
Type TokenType
Value string
}
func NewToken(tp, text string) *Token {
return &Token{
Type: tp,
Value: text,
}
}
func Tokenize(text string) []*Token {
tokens := []*Token{}
for _, c := range text {
switch c {
case '_':
tokens = append(tokens, NewToken(Underscore, "_"))
case '*':
tokens = append(tokens, NewToken(Asterisk, "*"))
case '#':
tokens = append(tokens, NewToken(PoundSign, "#"))
case '`':
tokens = append(tokens, NewToken(Backtick, "`"))
case '[':
tokens = append(tokens, NewToken(LeftSquareBracket, "["))
case ']':
tokens = append(tokens, NewToken(RightSquareBracket, "]"))
case '(':
tokens = append(tokens, NewToken(LeftParenthesis, "("))
case ')':
tokens = append(tokens, NewToken(RightParenthesis, ")"))
case '!':
tokens = append(tokens, NewToken(ExclamationMark, "!"))
case '~':
tokens = append(tokens, NewToken(Tilde, "~"))
case '-':
tokens = append(tokens, NewToken(Hyphen, "-"))
case '>':
tokens = append(tokens, NewToken(GreaterThan, ">"))
case '+':
tokens = append(tokens, NewToken(PlusSign, "+"))
case '.':
tokens = append(tokens, NewToken(Dot, "."))
case '\\':
tokens = append(tokens, NewToken(Backslash, `\`))
case '\n':
tokens = append(tokens, NewToken(Newline, "\n"))
case ' ':
tokens = append(tokens, NewToken(Space, " "))
default:
var prevToken *Token
if len(tokens) > 0 {
prevToken = tokens[len(tokens)-1]
}
isNumber := c >= '0' && c <= '9'
if prevToken != nil {
if (prevToken.Type == Text && !isNumber) || (prevToken.Type == Number && isNumber) {
prevToken.Value += string(c)
continue
}
}
if isNumber {
tokens = append(tokens, NewToken(Number, string(c)))
} else {
tokens = append(tokens, NewToken(Text, string(c)))
}
}
}
return tokens
}
func (t *Token) String() string {
return t.Value
}
func Stringify(tokens []*Token) string {
text := ""
for _, token := range tokens {
text += token.String()
}
return text
}