yq/pkg/yqlib/treeops/path_tokeniser.go

228 lines
6.8 KiB
Go
Raw Normal View History

2020-10-08 23:59:03 +00:00
package treeops
2020-09-17 11:58:01 +00:00
import (
2020-09-17 12:12:56 +00:00
"strconv"
2020-09-17 11:58:01 +00:00
lex "github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
)
2020-10-11 00:24:22 +00:00
func skip(*lex.Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
2020-09-17 11:58:01 +00:00
2020-10-11 00:24:22 +00:00
type Token struct {
PathElementType PathElementType
2020-10-11 23:44:33 +00:00
OperationType *OperationType
2020-10-11 00:24:22 +00:00
Value interface{}
StringValue string
2020-10-11 23:09:13 +00:00
PrefixSelf bool
2020-09-20 12:40:09 +00:00
2020-10-16 01:29:26 +00:00
CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat
2020-09-17 11:58:01 +00:00
}
2020-10-11 00:24:22 +00:00
func pathToken(wrapped bool) lex.Action {
2020-09-17 11:58:01 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2020-10-11 00:24:22 +00:00
value := string(m.Bytes)
2020-10-16 01:29:26 +00:00
value = value[1:len(value)]
2020-10-11 00:24:22 +00:00
if wrapped {
value = unwrap(value)
}
2020-10-16 01:29:26 +00:00
return &Token{PathElementType: PathKey, OperationType: None, Value: value, StringValue: value, CheckForPostTraverse: true}, nil
2020-09-17 11:58:01 +00:00
}
}
2020-10-19 09:05:38 +00:00
func documentToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
numberString = numberString[1:len(numberString)]
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
if errParsingInt != nil {
return nil, errParsingInt
}
return &Token{PathElementType: DocumentKey, OperationType: None, Value: number, StringValue: numberString, CheckForPostTraverse: true}, nil
}
}
2020-10-16 01:29:26 +00:00
func opToken(op *OperationType) lex.Action {
2020-10-11 00:24:22 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
2020-10-16 01:29:26 +00:00
return &Token{PathElementType: Operation, OperationType: op, Value: op.Type, StringValue: value}, nil
2020-10-11 00:24:22 +00:00
}
2020-09-17 12:12:56 +00:00
}
2020-10-16 01:29:26 +00:00
func literalToken(pType PathElementType, literal string, checkForPost bool) lex.Action {
2020-09-17 12:12:56 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2020-10-16 01:29:26 +00:00
return &Token{PathElementType: pType, OperationType: None, Value: literal, StringValue: literal, CheckForPostTraverse: checkForPost}, nil
2020-09-17 12:12:56 +00:00
}
}
2020-10-11 00:24:22 +00:00
func unwrap(value string) string {
return value[1 : len(value)-1]
}
2020-10-16 01:29:26 +00:00
func arrayIndextoken(precedingDot bool) lex.Action {
2020-09-17 12:12:56 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
2020-10-16 01:29:26 +00:00
startIndex := 1
if precedingDot {
startIndex = 2
2020-09-17 12:12:56 +00:00
}
2020-10-16 01:29:26 +00:00
numberString = numberString[startIndex : len(numberString)-1]
2020-09-17 12:12:56 +00:00
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
if errParsingInt != nil {
return nil, errParsingInt
}
2020-10-16 01:29:26 +00:00
return &Token{PathElementType: PathKey, OperationType: None, Value: number, StringValue: numberString, CheckForPostTraverse: true}, nil
}
}
func numberValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
if errParsingInt != nil {
return nil, errParsingInt
}
return &Token{PathElementType: Value, OperationType: None, Value: number, StringValue: numberString}, nil
}
}
2020-10-17 11:39:01 +00:00
func floatValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
var number, errParsingInt = strconv.ParseFloat(numberString, 64) // nolint
if errParsingInt != nil {
return nil, errParsingInt
}
return &Token{PathElementType: Value, OperationType: None, Value: number, StringValue: numberString}, nil
}
}
2020-10-16 01:29:26 +00:00
func booleanValue(val bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return &Token{PathElementType: Value, OperationType: None, Value: val, StringValue: string(m.Bytes)}, nil
}
}
func stringValue(wrapped bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
if wrapped {
value = unwrap(value)
}
return &Token{PathElementType: Value, OperationType: None, Value: value, StringValue: value}, nil
}
}
func selfToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return &Token{PathElementType: SelfReference, OperationType: None, Value: "SELF", StringValue: "SELF"}, nil
2020-09-17 12:12:56 +00:00
}
}
2020-09-17 11:58:01 +00:00
// Creates the lexer object and compiles the NFA.
func initLexer() (*lex.Lexer, error) {
lexer := lex.NewLexer()
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\(`), literalToken(OpenBracket, "(", false))
lexer.Add([]byte(`\)`), literalToken(CloseBracket, ")", true))
2020-10-11 00:24:22 +00:00
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\.?\[\]`), literalToken(PathKey, "[]", true))
2020-10-18 00:31:36 +00:00
lexer.Add([]byte(`\.\.`), opToken(RecursiveDescent))
2020-10-11 00:24:22 +00:00
2020-10-17 11:10:47 +00:00
lexer.Add([]byte(`,`), opToken(Union))
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`length`), opToken(Length))
2020-10-17 11:10:47 +00:00
lexer.Add([]byte(`select`), opToken(Select))
lexer.Add([]byte(`or`), opToken(Or))
2020-10-17 11:39:01 +00:00
// lexer.Add([]byte(`and`), opToken())
2020-10-17 11:10:47 +00:00
lexer.Add([]byte(`collect`), opToken(Collect))
2020-10-11 00:24:22 +00:00
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\s*==\s*`), opToken(Equals))
2020-10-11 00:24:22 +00:00
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\s*.-\s*`), opToken(DeleteChild))
2020-10-11 00:24:22 +00:00
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\s*\|=\s*`), opToken(Assign))
lexer.Add([]byte(`\[-?[0-9]+\]`), arrayIndextoken(false))
lexer.Add([]byte(`\.\[-?[0-9]+\]`), arrayIndextoken(true))
2020-10-11 00:24:22 +00:00
2020-09-17 11:58:01 +00:00
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
2020-10-11 00:24:22 +00:00
2020-10-19 09:05:38 +00:00
lexer.Add([]byte(`d[0-9]+`), documentToken()) // $0
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\."[^ "]+"`), pathToken(true))
lexer.Add([]byte(`\.[^ \[\],\|\.\[\(\)=]+`), pathToken(false))
lexer.Add([]byte(`\.`), selfToken())
lexer.Add([]byte(`\|`), opToken(Pipe))
2020-10-17 11:39:01 +00:00
lexer.Add([]byte(`-?\d+(\.\d+)`), floatValue())
lexer.Add([]byte(`-?[1-9](\.\d+)?[Ee][-+]?\d+`), floatValue())
lexer.Add([]byte(`-?\d+`), numberValue())
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`[Tt][Rr][Uu][Ee]`), booleanValue(true))
lexer.Add([]byte(`[Ff][Aa][Ll][Ss][Ee]`), booleanValue(false))
2020-10-11 00:24:22 +00:00
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`"[^ "]+"`), stringValue(true))
lexer.Add([]byte(`\[`), literalToken(OpenCollect, "[", false))
lexer.Add([]byte(`\]`), literalToken(CloseCollect, "]", true))
2020-10-18 21:36:33 +00:00
lexer.Add([]byte(`\*`), opToken(Multiply))
2020-10-16 01:29:26 +00:00
// lexer.Add([]byte(`[^ \,\|\.\[\(\)=]+`), stringValue(false))
2020-09-17 11:58:01 +00:00
err := lexer.Compile()
if err != nil {
return nil, err
}
return lexer, nil
}
type PathTokeniser interface {
2020-10-11 00:24:22 +00:00
Tokenise(path string) ([]*Token, error)
2020-09-17 11:58:01 +00:00
}
type pathTokeniser struct {
lexer *lex.Lexer
}
func NewPathTokeniser() PathTokeniser {
var lexer, err = initLexer()
if err != nil {
panic(err)
}
return &pathTokeniser{lexer}
}
2020-10-11 00:24:22 +00:00
func (p *pathTokeniser) Tokenise(path string) ([]*Token, error) {
2020-09-17 11:58:01 +00:00
scanner, err := p.lexer.Scanner([]byte(path))
if err != nil {
return nil, err
}
2020-10-11 00:24:22 +00:00
var tokens []*Token
2020-09-17 11:58:01 +00:00
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if tok != nil {
2020-10-11 00:24:22 +00:00
token := tok.(*Token)
2020-10-11 23:44:33 +00:00
log.Debugf("Tokenising %v - %v", token.Value, token.OperationType.Type)
2020-09-17 11:58:01 +00:00
tokens = append(tokens, token)
}
if err != nil {
return nil, err
}
}
2020-10-11 00:24:22 +00:00
var postProcessedTokens = make([]*Token, 0)
2020-09-24 00:52:45 +00:00
for index, token := range tokens {
postProcessedTokens = append(postProcessedTokens, token)
2020-10-11 00:24:22 +00:00
if index != len(tokens)-1 && token.CheckForPostTraverse &&
tokens[index+1].PathElementType == PathKey {
2020-10-16 01:29:26 +00:00
postProcessedTokens = append(postProcessedTokens, &Token{PathElementType: Operation, OperationType: Pipe, Value: "PIPE"})
2020-09-24 00:52:45 +00:00
}
}
2020-09-17 11:58:01 +00:00
2020-09-24 00:52:45 +00:00
return postProcessedTokens, nil
2020-09-17 11:58:01 +00:00
}