2020-11-03 23:48:43 +00:00
|
|
|
package yqlib
|
2020-09-17 11:58:01 +00:00
|
|
|
|
|
|
|
import (
|
2020-09-17 12:12:56 +00:00
|
|
|
"strconv"
|
2020-09-17 11:58:01 +00:00
|
|
|
|
|
|
|
lex "github.com/timtadh/lexmachine"
|
|
|
|
"github.com/timtadh/lexmachine/machines"
|
|
|
|
)
|
|
|
|
|
2020-10-11 00:24:22 +00:00
|
|
|
func skip(*lex.Scanner, *machines.Match) (interface{}, error) {
|
|
|
|
return nil, nil
|
|
|
|
}
|
2020-09-17 11:58:01 +00:00
|
|
|
|
2020-10-20 02:53:26 +00:00
|
|
|
type TokenType uint32
|
|
|
|
|
|
|
|
const (
|
2020-10-20 04:33:20 +00:00
|
|
|
OperationToken = 1 << iota
|
2020-10-20 02:53:26 +00:00
|
|
|
OpenBracket
|
|
|
|
CloseBracket
|
|
|
|
OpenCollect
|
|
|
|
CloseCollect
|
2020-10-21 01:54:58 +00:00
|
|
|
OpenCollectObject
|
|
|
|
CloseCollectObject
|
2020-10-20 02:53:26 +00:00
|
|
|
)
|
|
|
|
|
2020-10-11 00:24:22 +00:00
|
|
|
type Token struct {
|
2020-11-19 05:45:05 +00:00
|
|
|
TokenType TokenType
|
|
|
|
Operation *Operation
|
|
|
|
AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it
|
|
|
|
CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat
|
2020-09-20 12:40:09 +00:00
|
|
|
|
2020-09-17 11:58:01 +00:00
|
|
|
}
|
|
|
|
|
2020-10-20 04:33:20 +00:00
|
|
|
func (t *Token) toString() string {
|
|
|
|
if t.TokenType == OperationToken {
|
|
|
|
return t.Operation.toString()
|
|
|
|
} else if t.TokenType == OpenBracket {
|
|
|
|
return "("
|
|
|
|
} else if t.TokenType == CloseBracket {
|
|
|
|
return ")"
|
|
|
|
} else if t.TokenType == OpenCollect {
|
|
|
|
return "["
|
|
|
|
} else if t.TokenType == CloseCollect {
|
|
|
|
return "]"
|
2020-10-21 01:54:58 +00:00
|
|
|
} else if t.TokenType == OpenCollectObject {
|
|
|
|
return "{"
|
|
|
|
} else if t.TokenType == CloseCollectObject {
|
|
|
|
return "}"
|
2020-10-20 04:33:20 +00:00
|
|
|
} else {
|
2020-11-13 03:07:11 +00:00
|
|
|
return "NFI"
|
2020-10-20 04:33:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-11 00:24:22 +00:00
|
|
|
func pathToken(wrapped bool) lex.Action {
|
2020-09-17 11:58:01 +00:00
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
2020-10-11 00:24:22 +00:00
|
|
|
value := string(m.Bytes)
|
2020-11-13 03:07:11 +00:00
|
|
|
value = value[1:]
|
2020-10-11 00:24:22 +00:00
|
|
|
if wrapped {
|
|
|
|
value = unwrap(value)
|
|
|
|
}
|
2020-10-20 04:33:20 +00:00
|
|
|
op := &Operation{OperationType: TraversePath, Value: value, StringValue: value}
|
|
|
|
return &Token{TokenType: OperationToken, Operation: op, CheckForPostTraverse: true}, nil
|
2020-10-20 02:53:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func literalPathToken(value string) lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
2020-10-20 04:33:20 +00:00
|
|
|
op := &Operation{OperationType: TraversePath, Value: value, StringValue: value}
|
|
|
|
return &Token{TokenType: OperationToken, Operation: op, CheckForPostTraverse: true}, nil
|
2020-09-17 11:58:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-19 09:05:38 +00:00
|
|
|
func documentToken() lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
var numberString = string(m.Bytes)
|
2020-11-13 03:07:11 +00:00
|
|
|
numberString = numberString[1:]
|
2020-10-19 09:05:38 +00:00
|
|
|
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
|
|
|
|
if errParsingInt != nil {
|
|
|
|
return nil, errParsingInt
|
|
|
|
}
|
2020-10-20 04:33:20 +00:00
|
|
|
op := &Operation{OperationType: DocumentFilter, Value: number, StringValue: numberString}
|
|
|
|
return &Token{TokenType: OperationToken, Operation: op, CheckForPostTraverse: true}, nil
|
2020-10-19 09:05:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-16 01:29:26 +00:00
|
|
|
func opToken(op *OperationType) lex.Action {
|
2020-11-19 05:45:05 +00:00
|
|
|
return opTokenWithPrefs(op, nil, nil)
|
2020-11-06 00:23:26 +00:00
|
|
|
}
|
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
func opAssignableToken(opType *OperationType, assignOpType *OperationType) lex.Action {
|
|
|
|
return opTokenWithPrefs(opType, assignOpType, nil)
|
|
|
|
}
|
|
|
|
|
|
|
|
func opTokenWithPrefs(op *OperationType, assignOpType *OperationType, preferences interface{}) lex.Action {
|
2020-10-11 00:24:22 +00:00
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
value := string(m.Bytes)
|
2020-11-06 00:23:26 +00:00
|
|
|
op := &Operation{OperationType: op, Value: op.Type, StringValue: value, Preferences: preferences}
|
2020-11-19 05:45:05 +00:00
|
|
|
var assign *Operation
|
|
|
|
if assignOpType != nil {
|
|
|
|
assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences}
|
|
|
|
}
|
|
|
|
return &Token{TokenType: OperationToken, Operation: op, AssignOperation: assign}, nil
|
2020-10-11 00:24:22 +00:00
|
|
|
}
|
2020-09-17 12:12:56 +00:00
|
|
|
}
|
|
|
|
|
2020-10-20 04:33:20 +00:00
|
|
|
func literalToken(pType TokenType, checkForPost bool) lex.Action {
|
2020-09-17 12:12:56 +00:00
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
2020-10-20 04:33:20 +00:00
|
|
|
return &Token{TokenType: pType, CheckForPostTraverse: checkForPost}, nil
|
2020-09-17 12:12:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-11 00:24:22 +00:00
|
|
|
func unwrap(value string) string {
|
|
|
|
return value[1 : len(value)-1]
|
|
|
|
}
|
|
|
|
|
2020-10-16 01:29:26 +00:00
|
|
|
func arrayIndextoken(precedingDot bool) lex.Action {
|
2020-09-17 12:12:56 +00:00
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
var numberString = string(m.Bytes)
|
2020-10-16 01:29:26 +00:00
|
|
|
startIndex := 1
|
|
|
|
if precedingDot {
|
|
|
|
startIndex = 2
|
2020-09-17 12:12:56 +00:00
|
|
|
}
|
2020-10-16 01:29:26 +00:00
|
|
|
numberString = numberString[startIndex : len(numberString)-1]
|
2020-09-17 12:12:56 +00:00
|
|
|
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
|
|
|
|
if errParsingInt != nil {
|
|
|
|
return nil, errParsingInt
|
|
|
|
}
|
2020-10-20 04:33:20 +00:00
|
|
|
op := &Operation{OperationType: TraversePath, Value: number, StringValue: numberString}
|
|
|
|
return &Token{TokenType: OperationToken, Operation: op, CheckForPostTraverse: true}, nil
|
2020-10-16 01:29:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func numberValue() lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
var numberString = string(m.Bytes)
|
|
|
|
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint
|
|
|
|
if errParsingInt != nil {
|
|
|
|
return nil, errParsingInt
|
|
|
|
}
|
2020-10-20 04:33:20 +00:00
|
|
|
|
|
|
|
return &Token{TokenType: OperationToken, Operation: CreateValueOperation(number, numberString)}, nil
|
2020-10-16 01:29:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-17 11:39:01 +00:00
|
|
|
func floatValue() lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
var numberString = string(m.Bytes)
|
|
|
|
var number, errParsingInt = strconv.ParseFloat(numberString, 64) // nolint
|
|
|
|
if errParsingInt != nil {
|
|
|
|
return nil, errParsingInt
|
|
|
|
}
|
2020-10-20 04:33:20 +00:00
|
|
|
return &Token{TokenType: OperationToken, Operation: CreateValueOperation(number, numberString)}, nil
|
2020-10-17 11:39:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-16 01:29:26 +00:00
|
|
|
func booleanValue(val bool) lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
2020-10-20 04:33:20 +00:00
|
|
|
return &Token{TokenType: OperationToken, Operation: CreateValueOperation(val, string(m.Bytes))}, nil
|
2020-10-16 01:29:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func stringValue(wrapped bool) lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
value := string(m.Bytes)
|
|
|
|
if wrapped {
|
|
|
|
value = unwrap(value)
|
|
|
|
}
|
2020-10-20 04:33:20 +00:00
|
|
|
return &Token{TokenType: OperationToken, Operation: CreateValueOperation(value, value)}, nil
|
2020-10-16 01:29:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-20 04:40:11 +00:00
|
|
|
func nullValue() lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
|
|
|
return &Token{TokenType: OperationToken, Operation: CreateValueOperation(nil, string(m.Bytes))}, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-16 01:29:26 +00:00
|
|
|
func selfToken() lex.Action {
|
|
|
|
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
|
2020-10-20 04:33:20 +00:00
|
|
|
op := &Operation{OperationType: SelfReference}
|
|
|
|
return &Token{TokenType: OperationToken, Operation: op}, nil
|
2020-09-17 12:12:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-17 11:58:01 +00:00
|
|
|
func initLexer() (*lex.Lexer, error) {
|
|
|
|
lexer := lex.NewLexer()
|
2020-10-20 04:33:20 +00:00
|
|
|
lexer.Add([]byte(`\(`), literalToken(OpenBracket, false))
|
|
|
|
lexer.Add([]byte(`\)`), literalToken(CloseBracket, true))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-10-20 02:53:26 +00:00
|
|
|
lexer.Add([]byte(`\.?\[\]`), literalPathToken("[]"))
|
2020-10-18 00:31:36 +00:00
|
|
|
lexer.Add([]byte(`\.\.`), opToken(RecursiveDescent))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-10-17 11:10:47 +00:00
|
|
|
lexer.Add([]byte(`,`), opToken(Union))
|
2020-10-21 01:54:58 +00:00
|
|
|
lexer.Add([]byte(`:\s*`), opToken(CreateMap))
|
2020-10-16 01:29:26 +00:00
|
|
|
lexer.Add([]byte(`length`), opToken(Length))
|
2020-10-17 11:10:47 +00:00
|
|
|
lexer.Add([]byte(`select`), opToken(Select))
|
2020-11-02 00:20:38 +00:00
|
|
|
lexer.Add([]byte(`explode`), opToken(Explode))
|
2020-10-17 11:10:47 +00:00
|
|
|
lexer.Add([]byte(`or`), opToken(Or))
|
2020-10-20 05:27:30 +00:00
|
|
|
lexer.Add([]byte(`not`), opToken(Not))
|
2020-11-02 00:20:38 +00:00
|
|
|
|
2020-11-06 01:11:38 +00:00
|
|
|
lexer.Add([]byte(`documentIndex`), opToken(GetDocumentIndex))
|
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
lexer.Add([]byte(`style`), opAssignableToken(GetStyle, AssignStyle))
|
2020-11-02 00:20:38 +00:00
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
lexer.Add([]byte(`tag`), opAssignableToken(GetTag, AssignTag))
|
2020-11-06 00:45:18 +00:00
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
lexer.Add([]byte(`lineComment`), opTokenWithPrefs(GetComment, AssignComment, &CommentOpPreferences{LineComment: true}))
|
2020-11-06 00:45:18 +00:00
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
lexer.Add([]byte(`headComment`), opTokenWithPrefs(GetComment, AssignComment, &CommentOpPreferences{HeadComment: true}))
|
2020-11-06 00:45:18 +00:00
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
lexer.Add([]byte(`footComment`), opTokenWithPrefs(GetComment, AssignComment, &CommentOpPreferences{FootComment: true}))
|
|
|
|
|
|
|
|
lexer.Add([]byte(`comments\s*=`), opTokenWithPrefs(AssignComment, nil, &CommentOpPreferences{LineComment: true, HeadComment: true, FootComment: true}))
|
2020-11-06 00:23:26 +00:00
|
|
|
|
2020-10-17 11:10:47 +00:00
|
|
|
lexer.Add([]byte(`collect`), opToken(Collect))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-10-16 01:29:26 +00:00
|
|
|
lexer.Add([]byte(`\s*==\s*`), opToken(Equals))
|
2020-11-19 06:08:13 +00:00
|
|
|
lexer.Add([]byte(`\s*=\s*`), opTokenWithPrefs(Assign, nil, &AssignOpPreferences{false}))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-11-14 02:38:44 +00:00
|
|
|
lexer.Add([]byte(`del`), opToken(DeleteChild))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-11-19 06:08:13 +00:00
|
|
|
lexer.Add([]byte(`\s*\|=\s*`), opTokenWithPrefs(Assign, nil, &AssignOpPreferences{true}))
|
2020-10-16 01:29:26 +00:00
|
|
|
|
|
|
|
lexer.Add([]byte(`\[-?[0-9]+\]`), arrayIndextoken(false))
|
|
|
|
lexer.Add([]byte(`\.\[-?[0-9]+\]`), arrayIndextoken(true))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-09-17 11:58:01 +00:00
|
|
|
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-11-13 02:19:54 +00:00
|
|
|
lexer.Add([]byte(`d[0-9]+`), documentToken())
|
2020-10-16 01:29:26 +00:00
|
|
|
lexer.Add([]byte(`\."[^ "]+"`), pathToken(true))
|
2020-10-21 01:54:58 +00:00
|
|
|
lexer.Add([]byte(`\.[^ \}\{\:\[\],\|\.\[\(\)=]+`), pathToken(false))
|
2020-10-16 01:29:26 +00:00
|
|
|
lexer.Add([]byte(`\.`), selfToken())
|
|
|
|
|
|
|
|
lexer.Add([]byte(`\|`), opToken(Pipe))
|
|
|
|
|
2020-10-17 11:39:01 +00:00
|
|
|
lexer.Add([]byte(`-?\d+(\.\d+)`), floatValue())
|
|
|
|
lexer.Add([]byte(`-?[1-9](\.\d+)?[Ee][-+]?\d+`), floatValue())
|
|
|
|
lexer.Add([]byte(`-?\d+`), numberValue())
|
2020-10-16 01:29:26 +00:00
|
|
|
|
|
|
|
lexer.Add([]byte(`[Tt][Rr][Uu][Ee]`), booleanValue(true))
|
|
|
|
lexer.Add([]byte(`[Ff][Aa][Ll][Ss][Ee]`), booleanValue(false))
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-10-20 04:40:11 +00:00
|
|
|
lexer.Add([]byte(`[Nn][Uu][Ll][Ll]`), nullValue())
|
|
|
|
lexer.Add([]byte(`~`), nullValue())
|
|
|
|
|
2020-11-02 00:20:38 +00:00
|
|
|
lexer.Add([]byte(`"[^ "]*"`), stringValue(true))
|
2020-10-16 01:29:26 +00:00
|
|
|
|
2020-10-20 04:33:20 +00:00
|
|
|
lexer.Add([]byte(`\[`), literalToken(OpenCollect, false))
|
|
|
|
lexer.Add([]byte(`\]`), literalToken(CloseCollect, true))
|
2020-10-21 01:54:58 +00:00
|
|
|
lexer.Add([]byte(`\{`), literalToken(OpenCollectObject, false))
|
|
|
|
lexer.Add([]byte(`\}`), literalToken(CloseCollectObject, true))
|
2020-10-18 21:36:33 +00:00
|
|
|
lexer.Add([]byte(`\*`), opToken(Multiply))
|
2020-10-16 01:29:26 +00:00
|
|
|
|
2020-09-17 11:58:01 +00:00
|
|
|
err := lexer.Compile()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return lexer, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type PathTokeniser interface {
|
2020-10-11 00:24:22 +00:00
|
|
|
Tokenise(path string) ([]*Token, error)
|
2020-09-17 11:58:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type pathTokeniser struct {
|
|
|
|
lexer *lex.Lexer
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewPathTokeniser() PathTokeniser {
|
|
|
|
var lexer, err = initLexer()
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return &pathTokeniser{lexer}
|
|
|
|
}
|
|
|
|
|
2020-10-11 00:24:22 +00:00
|
|
|
func (p *pathTokeniser) Tokenise(path string) ([]*Token, error) {
|
2020-09-17 11:58:01 +00:00
|
|
|
scanner, err := p.lexer.Scanner([]byte(path))
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-10-11 00:24:22 +00:00
|
|
|
var tokens []*Token
|
2020-09-17 11:58:01 +00:00
|
|
|
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
|
|
|
|
|
|
|
|
if tok != nil {
|
2020-10-11 00:24:22 +00:00
|
|
|
token := tok.(*Token)
|
2020-10-20 04:33:20 +00:00
|
|
|
log.Debugf("Tokenising %v", token.toString())
|
2020-09-17 11:58:01 +00:00
|
|
|
tokens = append(tokens, token)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
2020-10-11 00:24:22 +00:00
|
|
|
var postProcessedTokens = make([]*Token, 0)
|
2020-09-24 00:52:45 +00:00
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
skipNextToken := false
|
2020-10-11 00:24:22 +00:00
|
|
|
|
2020-11-19 05:45:05 +00:00
|
|
|
for index, token := range tokens {
|
|
|
|
if skipNextToken {
|
|
|
|
skipNextToken = false
|
|
|
|
} else {
|
|
|
|
|
|
|
|
if index != len(tokens)-1 && token.AssignOperation != nil &&
|
|
|
|
tokens[index+1].TokenType == OperationToken &&
|
2020-11-19 06:08:13 +00:00
|
|
|
tokens[index+1].Operation.OperationType == Assign {
|
2020-11-19 05:45:05 +00:00
|
|
|
token.Operation = token.AssignOperation
|
|
|
|
skipNextToken = true
|
|
|
|
}
|
|
|
|
|
|
|
|
postProcessedTokens = append(postProcessedTokens, token)
|
|
|
|
|
|
|
|
if index != len(tokens)-1 && token.CheckForPostTraverse &&
|
|
|
|
tokens[index+1].TokenType == OperationToken &&
|
|
|
|
tokens[index+1].Operation.OperationType == TraversePath {
|
|
|
|
op := &Operation{OperationType: Pipe, Value: "PIPE"}
|
|
|
|
postProcessedTokens = append(postProcessedTokens, &Token{TokenType: OperationToken, Operation: op})
|
|
|
|
}
|
2020-09-24 00:52:45 +00:00
|
|
|
}
|
|
|
|
}
|
2020-09-17 11:58:01 +00:00
|
|
|
|
2020-09-24 00:52:45 +00:00
|
|
|
return postProcessedTokens, nil
|
2020-09-17 11:58:01 +00:00
|
|
|
}
|