yq/pkg/yqlib/path_tokeniser.go
2020-10-09 08:51:14 +11:00

135 lines
3.2 KiB
Go

package yqlib
import (
"strconv"
"strings"
lex "github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
)
var Literals []string // The tokens representing literal strings
var Keywords []string // The keyword tokens
var Tokens []string // All of the tokens (including literals and keywords)
var TokenIds map[string]int // A map from the token names to their int ids
func initTokens() {
Literals = []string{
"(",
")",
"[+]",
"[*]",
"**",
}
Tokens = []string{
"OR_OPERATOR",
"AND_OPERATOR",
"EQUALS_OPERATOR",
"PATH_KEY", // apples
"ARRAY_INDEX", // 1234
}
Tokens = append(Tokens, Literals...)
TokenIds = make(map[string]int)
for i, tok := range Tokens {
TokenIds[tok] = i
}
initMaps()
}
func skip(*lex.Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
func token(name string) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return s.Token(TokenIds[name], string(m.Bytes), m), nil
}
}
func unwrap(value string) string {
return value[1 : len(value)-1]
}
func wrappedToken(name string) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return s.Token(TokenIds[name], unwrap(string(m.Bytes)), m), nil
}
}
func numberToken(name string, wrapped bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
if wrapped {
numberString = unwrap(numberString)
}
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
if errParsingInt != nil {
return nil, errParsingInt
}
return s.Token(TokenIds[name], number, m), nil
}
}
// Creates the lexer object and compiles the NFA.
func initLexer() (*lex.Lexer, error) {
lexer := lex.NewLexer()
for _, lit := range Literals {
r := "\\" + strings.Join(strings.Split(lit, ""), "\\")
lexer.Add([]byte(r), token(lit))
}
lexer.Add([]byte(`([Oo][Rr])`), token("OR_OPERATOR"))
lexer.Add([]byte(`([Aa][Nn][Dd])`), token("AND_OPERATOR"))
lexer.Add([]byte(`(==)`), token("EQUALS_OPERATOR"))
lexer.Add([]byte(`\[-?[0-9]+\]`), numberToken("ARRAY_INDEX", true))
lexer.Add([]byte(`-?[0-9]+`), numberToken("ARRAY_INDEX", false))
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
lexer.Add([]byte(`"[^ "]+"`), wrappedToken("PATH_KEY"))
lexer.Add([]byte(`[^ \.\[\(\)=]+`), token("PATH_KEY"))
lexer.Add([]byte(`\.`), skip)
err := lexer.Compile()
if err != nil {
return nil, err
}
return lexer, nil
}
type PathTokeniser interface {
Tokenise(path string) ([]*lex.Token, error)
}
type pathTokeniser struct {
lexer *lex.Lexer
}
func NewPathTokeniser() PathTokeniser {
initTokens()
var lexer, err = initLexer()
if err != nil {
panic(err)
}
return &pathTokeniser{lexer}
}
func (p *pathTokeniser) Tokenise(path string) ([]*lex.Token, error) {
scanner, err := p.lexer.Scanner([]byte(path))
if err != nil {
return nil, err
}
var tokens []*lex.Token
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if tok != nil {
token := tok.(*lex.Token)
log.Debugf("Processing %v - %v", token.Value, Tokens[token.Type])
tokens = append(tokens, token)
}
if err != nil {
return nil, err
}
}
return tokens, nil
}