package yqlib import ( "fmt" "strconv" "strings" lex "github.com/timtadh/lexmachine" "github.com/timtadh/lexmachine/machines" ) func skip(*lex.Scanner, *machines.Match) (interface{}, error) { return nil, nil } type tokenType uint32 const ( operationToken = 1 << iota openBracket closeBracket openCollect closeCollect openCollectObject closeCollectObject traverseArrayCollect ) type token struct { TokenType tokenType Operation *Operation AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat Match *machines.Match // match that created this token } func (t *token) toString(detail bool) string { if t.TokenType == operationToken { if detail { return fmt.Sprintf("%v (%v)", t.Operation.toString(), t.Operation.OperationType.Precedence) } return t.Operation.toString() } else if t.TokenType == openBracket { return "(" } else if t.TokenType == closeBracket { return ")" } else if t.TokenType == openCollect { return "[" } else if t.TokenType == closeCollect { return "]" } else if t.TokenType == openCollectObject { return "{" } else if t.TokenType == closeCollectObject { return "}" } else if t.TokenType == traverseArrayCollect { return ".[" } else { return "NFI" } } func pathToken(wrapped bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { value := string(m.Bytes) prefs := traversePreferences{} if value[len(value)-1:] == "?" { prefs.OptionalTraverse = true value = value[:len(value)-1] } value = value[1:] if wrapped { value = unwrap(value) } log.Debug("PathToken %v", value) op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs} return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil } } func opToken(op *operationType) lex.Action { return opTokenWithPrefs(op, nil, nil) } func opAssignableToken(opType *operationType, assignOpType *operationType) lex.Action { return opTokenWithPrefs(opType, assignOpType, nil) } func assignOpToken(updateAssign bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { log.Debug("assignOpToken %v", string(m.Bytes)) value := string(m.Bytes) op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign} return &token{TokenType: operationToken, Operation: op}, nil } } func multiplyWithPrefs() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { prefs := multiplyPreferences{} options := string(m.Bytes) if strings.Contains(options, "+") { prefs.AppendArrays = true } if strings.Contains(options, "?") { prefs.TraversePrefs = traversePreferences{DontAutoCreate: true} } if strings.Contains(options, "d") { prefs.DeepMergeArrays = true } prefs.TraversePrefs.DontFollowAlias = true op := &Operation{OperationType: multiplyOpType, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs} return &token{TokenType: operationToken, Operation: op}, nil } } func opTokenWithPrefs(op *operationType, assignOpType *operationType, preferences interface{}) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { log.Debug("opTokenWithPrefs %v", string(m.Bytes)) value := string(m.Bytes) op := &Operation{OperationType: op, Value: op.Type, StringValue: value, Preferences: preferences} var assign *Operation if assignOpType != nil { assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences} } return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil } } func assignAllCommentsOp(updateAssign bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { log.Debug("assignAllCommentsOp %v", string(m.Bytes)) value := string(m.Bytes) op := &Operation{ OperationType: assignCommentOpType, Value: assignCommentOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true}, } return &token{TokenType: operationToken, Operation: op}, nil } } func literalToken(pType tokenType, checkForPost bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { return &token{TokenType: pType, CheckForPostTraverse: checkForPost, Match: m}, nil } } func unwrap(value string) string { return value[1 : len(value)-1] } func numberValue() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { var numberString = string(m.Bytes) var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) // nolint if errParsingInt != nil { return nil, errParsingInt } return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil } } func hexValue() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { var originalString = string(m.Bytes) var numberString = originalString[2:] log.Debugf("numberString: %v", numberString) var number, errParsingInt = strconv.ParseInt(numberString, 16, 64) // nolint if errParsingInt != nil { return nil, errParsingInt } return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil } } func floatValue() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { var numberString = string(m.Bytes) var number, errParsingInt = strconv.ParseFloat(numberString, 64) // nolint if errParsingInt != nil { return nil, errParsingInt } return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil } } func booleanValue(val bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { return &token{TokenType: operationToken, Operation: createValueOperation(val, string(m.Bytes))}, nil } } func stringValue(wrapped bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { value := string(m.Bytes) if wrapped { value = unwrap(value) } value = strings.ReplaceAll(value, "\\\"", "\"") return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil } } func getVariableOpToken() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { value := string(m.Bytes) value = value[1:] getVarOperation := createValueOperation(value, value) getVarOperation.OperationType = getVariableOpType return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil } } func envOp(strenv bool) lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { value := string(m.Bytes) preferences := envOpPreferences{} if strenv { // strenv( ) value = value[7 : len(value)-1] preferences.StringValue = true } else { //env( ) value = value[4 : len(value)-1] } envOperation := createValueOperation(value, value) envOperation.OperationType = envOpType envOperation.Preferences = preferences return &token{TokenType: operationToken, Operation: envOperation}, nil } } func nullValue() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { return &token{TokenType: operationToken, Operation: createValueOperation(nil, string(m.Bytes))}, nil } } func selfToken() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { op := &Operation{OperationType: selfReferenceOpType} return &token{TokenType: operationToken, Operation: op}, nil } } func initLexer() (*lex.Lexer, error) { lexer := lex.NewLexer() lexer.Add([]byte(`\(`), literalToken(openBracket, false)) lexer.Add([]byte(`\)`), literalToken(closeBracket, true)) lexer.Add([]byte(`\.\[`), literalToken(traverseArrayCollect, false)) lexer.Add([]byte(`\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true, TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: false}})) lexer.Add([]byte(`\.\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true, TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: true}})) lexer.Add([]byte(`,`), opToken(unionOpType)) lexer.Add([]byte(`:\s*`), opToken(createMapOpType)) lexer.Add([]byte(`length`), opToken(lengthOpType)) lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType)) lexer.Add([]byte(`select`), opToken(selectOpType)) lexer.Add([]byte(`has`), opToken(hasOpType)) lexer.Add([]byte(`unique`), opToken(uniqueOpType)) lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType)) lexer.Add([]byte(`explode`), opToken(explodeOpType)) lexer.Add([]byte(`or`), opToken(orOpType)) lexer.Add([]byte(`and`), opToken(andOpType)) lexer.Add([]byte(`not`), opToken(notOpType)) lexer.Add([]byte(`ireduce`), opToken(reduceOpType)) lexer.Add([]byte(`;`), opToken(blockOpType)) lexer.Add([]byte(`\/\/`), opToken(alternativeOpType)) lexer.Add([]byte(`documentIndex`), opToken(getDocumentIndexOpType)) lexer.Add([]byte(`di`), opToken(getDocumentIndexOpType)) lexer.Add([]byte(`splitDoc`), opToken(splitDocumentOpType)) lexer.Add([]byte(`join`), opToken(joinStringOpType)) lexer.Add([]byte(`sub`), opToken(subStringOpType)) lexer.Add([]byte(`match`), opToken(matchOpType)) lexer.Add([]byte(`capture`), opToken(captureOpType)) lexer.Add([]byte(`test`), opToken(testOpType)) lexer.Add([]byte(`any`), opToken(anyOpType)) lexer.Add([]byte(`any_c`), opToken(anyConditionOpType)) lexer.Add([]byte(`all`), opToken(allOpType)) lexer.Add([]byte(`all_c`), opToken(allConditionOpType)) lexer.Add([]byte(`contains`), opToken(containsOpType)) lexer.Add([]byte(`split`), opToken(splitStringOpType)) lexer.Add([]byte(`keys`), opToken(keysOpType)) lexer.Add([]byte(`style`), opAssignableToken(getStyleOpType, assignStyleOpType)) lexer.Add([]byte(`tag`), opAssignableToken(getTagOpType, assignTagOpType)) lexer.Add([]byte(`anchor`), opAssignableToken(getAnchorOpType, assignAnchorOpType)) lexer.Add([]byte(`alias`), opAssignableToken(getAliasOptype, assignAliasOpType)) lexer.Add([]byte(`filename`), opToken(getFilenameOpType)) lexer.Add([]byte(`fileIndex`), opToken(getFileIndexOpType)) lexer.Add([]byte(`fi`), opToken(getFileIndexOpType)) lexer.Add([]byte(`path`), opToken(getPathOpType)) lexer.Add([]byte(`to_entries`), opToken(toEntriesOpType)) lexer.Add([]byte(`from_entries`), opToken(fromEntriesOpType)) lexer.Add([]byte(`with_entries`), opToken(withEntriesOpType)) lexer.Add([]byte(`with`), opToken(withOpType)) lexer.Add([]byte(`lineComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true})) lexer.Add([]byte(`headComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true})) lexer.Add([]byte(`footComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true})) lexer.Add([]byte(`comments\s*=`), assignAllCommentsOp(false)) lexer.Add([]byte(`comments\s*\|=`), assignAllCommentsOp(true)) lexer.Add([]byte(`collect`), opToken(collectOpType)) lexer.Add([]byte(`\s*==\s*`), opToken(equalsOpType)) lexer.Add([]byte(`\s*!=\s*`), opToken(notEqualsOpType)) lexer.Add([]byte(`\s*=\s*`), assignOpToken(false)) lexer.Add([]byte(`del`), opToken(deleteChildOpType)) lexer.Add([]byte(`\s*\|=\s*`), assignOpToken(true)) lexer.Add([]byte("( |\t|\n|\r)+"), skip) lexer.Add([]byte(`\."[^ "]+"\??`), pathToken(true)) lexer.Add([]byte(`\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`), pathToken(false)) lexer.Add([]byte(`\.`), selfToken()) lexer.Add([]byte(`\|`), opToken(pipeOpType)) lexer.Add([]byte(`0[xX][0-9A-Fa-f]+`), hexValue()) lexer.Add([]byte(`-?\d+(\.\d+)`), floatValue()) lexer.Add([]byte(`-?[1-9](\.\d+)?[Ee][-+]?\d+`), floatValue()) lexer.Add([]byte(`-?\d+`), numberValue()) lexer.Add([]byte(`[Tt][Rr][Uu][Ee]`), booleanValue(true)) lexer.Add([]byte(`[Ff][Aa][Ll][Ss][Ee]`), booleanValue(false)) lexer.Add([]byte(`[Nn][Uu][Ll][Ll]`), nullValue()) lexer.Add([]byte(`~`), nullValue()) lexer.Add([]byte(`"([^"\\]*(\\.[^"\\]*)*)"`), stringValue(true)) lexer.Add([]byte(`strenv\([^\)]+\)`), envOp(true)) lexer.Add([]byte(`env\([^\)]+\)`), envOp(false)) lexer.Add([]byte(`\[`), literalToken(openCollect, false)) lexer.Add([]byte(`\]\??`), literalToken(closeCollect, true)) lexer.Add([]byte(`\{`), literalToken(openCollectObject, false)) lexer.Add([]byte(`\}`), literalToken(closeCollectObject, true)) lexer.Add([]byte(`\*[\+|\?d]*`), multiplyWithPrefs()) lexer.Add([]byte(`\+`), opToken(addOpType)) lexer.Add([]byte(`\+=`), opToken(addAssignOpType)) lexer.Add([]byte(`\-`), opToken(subtractOpType)) lexer.Add([]byte(`\-=`), opToken(subtractAssignOpType)) lexer.Add([]byte(`\$[a-zA-Z_-0-9]+`), getVariableOpToken()) lexer.Add([]byte(`as`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{})) lexer.Add([]byte(`ref`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true})) err := lexer.CompileNFA() if err != nil { return nil, err } return lexer, nil } type expressionTokeniser interface { Tokenise(expression string) ([]*token, error) } type expressionTokeniserImpl struct { lexer *lex.Lexer } func newExpressionTokeniser() expressionTokeniser { var lexer, err = initLexer() if err != nil { panic(err) } return &expressionTokeniserImpl{lexer} } func (p *expressionTokeniserImpl) Tokenise(expression string) ([]*token, error) { scanner, err := p.lexer.Scanner([]byte(expression)) if err != nil { return nil, fmt.Errorf("Parsing expression: %v", err) } var tokens []*token for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() { if tok != nil { currentToken := tok.(*token) log.Debugf("Tokenising %v", currentToken.toString(true)) tokens = append(tokens, currentToken) } if err != nil { return nil, fmt.Errorf("Parsing expression: %v", err) } } var postProcessedTokens = make([]*token, 0) skipNextToken := false for index := range tokens { if skipNextToken { skipNextToken = false } else { postProcessedTokens, skipNextToken = p.handleToken(tokens, index, postProcessedTokens) } } return postProcessedTokens, nil } func (p *expressionTokeniserImpl) handleToken(tokens []*token, index int, postProcessedTokens []*token) (tokensAccum []*token, skipNextToken bool) { skipNextToken = false currentToken := tokens[index] log.Debug("processing %v", currentToken.toString(true)) if currentToken.TokenType == traverseArrayCollect { // `.[exp]`` works by creating a traversal array of [self, exp] and piping that into the traverse array operator //need to put a traverse array then a collect currentToken // do this by adding traverse then converting currentToken to collect log.Debug(" adding self") op := &Operation{OperationType: selfReferenceOpType, StringValue: "SELF"} postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) log.Debug(" adding traverse array") op = &Operation{OperationType: traverseArrayOpType, StringValue: "TRAVERSE_ARRAY"} postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) currentToken = &token{TokenType: openCollect} } if index != len(tokens)-1 && currentToken.AssignOperation != nil && tokens[index+1].TokenType == operationToken && tokens[index+1].Operation.OperationType == assignOpType { log.Debug(" its an update assign") currentToken.Operation = currentToken.AssignOperation currentToken.Operation.UpdateAssign = tokens[index+1].Operation.UpdateAssign skipNextToken = true } log.Debug(" adding token to the fixed list") postProcessedTokens = append(postProcessedTokens, currentToken) if index != len(tokens)-1 && ((currentToken.TokenType == openCollect && tokens[index+1].TokenType == closeCollect) || (currentToken.TokenType == openCollectObject && tokens[index+1].TokenType == closeCollectObject)) { log.Debug(" adding empty") op := &Operation{OperationType: emptyOpType, StringValue: "EMPTY"} postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) } if index != len(tokens)-1 && currentToken.CheckForPostTraverse && ((tokens[index+1].TokenType == operationToken && (tokens[index+1].Operation.OperationType == traversePathOpType)) || (tokens[index+1].TokenType == traverseArrayCollect)) { log.Debug(" adding pipe because the next thing is traverse") op := &Operation{OperationType: shortPipeOpType, Value: "PIPE", StringValue: "."} postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) } if index != len(tokens)-1 && currentToken.CheckForPostTraverse && tokens[index+1].TokenType == openCollect { log.Debug(" adding traverArray because next is opencollect") op := &Operation{OperationType: traverseArrayOpType} postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) } return postProcessedTokens, skipNextToken }