From 0c24ba23f0ff600e040e1f7f82052a9346a348f4 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Wed, 13 Jul 2022 14:30:35 +1000 Subject: [PATCH] Using new lexer by alecthomas! --- go.mod | 4 +- go.sum | 10 +- pkg/yqlib/expression_parser.go | 2 +- pkg/yqlib/expression_postfix.go | 2 +- pkg/yqlib/expression_processing_test.go | 2 +- pkg/yqlib/expression_tokeniser.go | 691 ------------------------ pkg/yqlib/lexer.go | 159 ++++++ pkg/yqlib/lexer_participle.go | 530 ++++++++++++++++++ pkg/yqlib/lexer_participle_test.go | 538 ++++++++++++++++++ pkg/yqlib/lib.go | 2 +- 10 files changed, 1239 insertions(+), 701 deletions(-) delete mode 100644 pkg/yqlib/expression_tokeniser.go create mode 100644 pkg/yqlib/lexer.go create mode 100644 pkg/yqlib/lexer_participle.go create mode 100644 pkg/yqlib/lexer_participle_test.go diff --git a/go.mod b/go.mod index 678eefc9..c5601a35 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,8 @@ module github.com/mikefarah/yq/v4 require ( github.com/a8m/envsubst v1.3.0 + github.com/alecthomas/participle/v2 v2.0.0-beta.4 + github.com/alecthomas/repr v0.1.0 github.com/elliotchance/orderedmap v1.4.0 github.com/fatih/color v1.13.0 github.com/goccy/go-yaml v1.9.5 @@ -9,7 +11,6 @@ require ( github.com/magiconair/properties v1.8.6 github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e github.com/spf13/cobra v1.5.0 - github.com/timtadh/lexmachine v0.2.2 golang.org/x/net v0.0.0-20220708220712-1185a9018129 gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473 gopkg.in/yaml.v3 v3.0.1 @@ -20,7 +21,6 @@ require ( github.com/mattn/go-colorable v0.1.12 // indirect github.com/mattn/go-isatty v0.0.14 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/timtadh/data-structures v0.5.3 // indirect golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e // indirect golang.org/x/text v0.3.7 // indirect golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect diff --git a/go.sum b/go.sum index 4607c98d..5f3be960 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,10 @@ github.com/a8m/envsubst v1.3.0 h1:GmXKmVssap0YtlU3E230W98RWtWCyIZzjtf1apWWyAg= github.com/a8m/envsubst v1.3.0/go.mod h1:MVUTQNGQ3tsjOOtKCNd+fl8RzhsXcDvvAEzkhGtlsbY= +github.com/alecthomas/assert/v2 v2.0.3 h1:WKqJODfOiQG0nEJKFKzDIG3E29CN2/4zR9XGJzKIkbg= +github.com/alecthomas/participle/v2 v2.0.0-beta.4 h1:ublfGBm+x+p2j7KotHhrUMbKtejT7M0Gv1Mt1u3absw= +github.com/alecthomas/participle/v2 v2.0.0-beta.4/go.mod h1:RC764t6n4L8D8ITAJv0qdokritYSNR3wV5cVwmIEaMM= +github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE= +github.com/alecthomas/repr v0.1.0/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -14,6 +19,7 @@ github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+ github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= github.com/goccy/go-yaml v1.9.5 h1:Eh/+3uk9kLxG4koCX6lRMAPS1OaMSAi+FJcya0INdB0= github.com/goccy/go-yaml v1.9.5/go.mod h1:U/jl18uSupI5rdI2jmuCswEA2htH9eXfferR3KfscvA= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jinzhu/copier v0.3.5 h1:GlvfUwHk62RokgqVNvYsku0TATCF7bAHVwEXoBh3iJg= @@ -41,10 +47,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/timtadh/data-structures v0.5.3 h1:F2tEjoG9qWIyUjbvXVgJqEOGJPMIiYn7U5W5mE+i/vQ= -github.com/timtadh/data-structures v0.5.3/go.mod h1:9R4XODhJ8JdWFEI8P/HJKqxuJctfBQw6fDibMQny2oU= -github.com/timtadh/lexmachine v0.2.2 h1:g55RnjdYazm5wnKv59pwFcBJHOyvTPfDEoz21s4PHmY= -github.com/timtadh/lexmachine v0.2.2/go.mod h1:GBJvD5OAfRn/gnp92zb9KTgHLB7akKyxmVivoYCcjQI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= diff --git a/pkg/yqlib/expression_parser.go b/pkg/yqlib/expression_parser.go index 26caf933..28ac4f18 100644 --- a/pkg/yqlib/expression_parser.go +++ b/pkg/yqlib/expression_parser.go @@ -21,7 +21,7 @@ type expressionParserImpl struct { } func newExpressionParser() ExpressionParserInterface { - return &expressionParserImpl{newExpressionTokeniser(), newExpressionPostFixer()} + return &expressionParserImpl{newParticipleLexer(), newExpressionPostFixer()} } func (p *expressionParserImpl) ParseExpression(expression string) (*ExpressionNode, error) { diff --git a/pkg/yqlib/expression_postfix.go b/pkg/yqlib/expression_postfix.go index 6b82e153..e875d8f9 100644 --- a/pkg/yqlib/expression_postfix.go +++ b/pkg/yqlib/expression_postfix.go @@ -76,7 +76,7 @@ func (p *expressionPostFixerImpl) ConvertToPostfix(infixTokens []*token) ([]*Ope // on the close op - move it to the traverse array op // allows for .["cat"]? prefs := traversePreferences{} - closeTokenMatch := string(currentToken.Match.Bytes) + closeTokenMatch := currentToken.Match if closeTokenMatch[len(closeTokenMatch)-1:] == "?" { prefs.OptionalTraverse = true } diff --git a/pkg/yqlib/expression_processing_test.go b/pkg/yqlib/expression_processing_test.go index 18b13a9b..ffc6700b 100644 --- a/pkg/yqlib/expression_processing_test.go +++ b/pkg/yqlib/expression_processing_test.go @@ -312,7 +312,7 @@ var pathTests = []struct { }, } -var tokeniser = newExpressionTokeniser() +var tokeniser = newParticipleLexer() var postFixer = newExpressionPostFixer() func TestPathParsing(t *testing.T) { diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go deleted file mode 100644 index 860032d1..00000000 --- a/pkg/yqlib/expression_tokeniser.go +++ /dev/null @@ -1,691 +0,0 @@ -package yqlib - -import ( - "fmt" - "regexp" - "strconv" - "strings" - - lex "github.com/timtadh/lexmachine" - "github.com/timtadh/lexmachine/machines" -) - -func skip(*lex.Scanner, *machines.Match) (interface{}, error) { - return nil, nil -} - -type tokenType uint32 - -const ( - operationToken = 1 << iota - openBracket - closeBracket - openCollect - closeCollect - openCollectObject - closeCollectObject - traverseArrayCollect -) - -type token struct { - TokenType tokenType - Operation *Operation - AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it - CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat - Match *machines.Match // match that created this token - -} - -func (t *token) toString(detail bool) string { - if t.TokenType == operationToken { - if detail { - return fmt.Sprintf("%v (%v)", t.Operation.toString(), t.Operation.OperationType.Precedence) - } - return t.Operation.toString() - } else if t.TokenType == openBracket { - return "(" - } else if t.TokenType == closeBracket { - return ")" - } else if t.TokenType == openCollect { - return "[" - } else if t.TokenType == closeCollect { - return "]" - } else if t.TokenType == openCollectObject { - return "{" - } else if t.TokenType == closeCollectObject { - return "}" - } else if t.TokenType == traverseArrayCollect { - return ".[" - - } else { - return "NFI" - } -} - -func pathToken(wrapped bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - prefs := traversePreferences{} - - if value[len(value)-1:] == "?" { - prefs.OptionalTraverse = true - value = value[:len(value)-1] - } - - value = value[1:] - if wrapped { - value = unwrap(value) - } - log.Debug("PathToken %v", value) - op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs} - return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil - } -} - -func opToken(op *operationType) lex.Action { - return opTokenWithPrefs(op, nil, nil) -} - -func opAssignableToken(opType *operationType, assignOpType *operationType) lex.Action { - return opTokenWithPrefs(opType, assignOpType, nil) -} - -func assignOpToken(updateAssign bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - log.Debug("assignOpToken %v", string(m.Bytes)) - value := string(m.Bytes) - prefs := assignPreferences{DontOverWriteAnchor: true} - op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: prefs} - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func multiplyWithPrefs(op *operationType) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - prefs := multiplyPreferences{} - options := string(m.Bytes) - if strings.Contains(options, "+") { - prefs.AppendArrays = true - } - if strings.Contains(options, "?") { - prefs.TraversePrefs = traversePreferences{DontAutoCreate: true} - } - if strings.Contains(options, "n") { - prefs.AssignPrefs = assignPreferences{OnlyWriteNull: true} - } - if strings.Contains(options, "d") { - prefs.DeepMergeArrays = true - } - prefs.TraversePrefs.DontFollowAlias = true - op := &Operation{OperationType: op, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs} - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func opTokenWithPrefs(op *operationType, assignOpType *operationType, preferences interface{}) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - log.Debug("opTokenWithPrefs %v", string(m.Bytes)) - value := string(m.Bytes) - op := &Operation{OperationType: op, Value: op.Type, StringValue: value, Preferences: preferences} - var assign *Operation - if assignOpType != nil { - assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences} - } - return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil - } -} - -func hasOptionParameter(value string, option string) bool { - parameterParser := regexp.MustCompile(`.*\([^\)]*\)`) - matches := parameterParser.FindStringSubmatch(value) - if len(matches) == 0 { - return false - } - parameterString := matches[0] - optionParser := regexp.MustCompile(fmt.Sprintf("\\b%v\\b", option)) - return len(optionParser.FindStringSubmatch(parameterString)) > 0 -} - -func extractNumberParameter(value string) (int, error) { - parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`) - matches := parameterParser.FindStringSubmatch(value) - var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32) - if errParsingInt != nil { - return 0, errParsingInt - } - return int(indent), nil -} - -func envSubstWithOptions() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - noEmpty := hasOptionParameter(value, "ne") - noUnset := hasOptionParameter(value, "nu") - failFast := hasOptionParameter(value, "ff") - envsubstOpType.Type = "ENVSUBST" - prefs := envOpPreferences{NoUnset: noUnset, NoEmpty: noEmpty, FailFast: failFast} - if noEmpty { - envsubstOpType.Type = envsubstOpType.Type + "_NO_EMPTY" - } - if noUnset { - envsubstOpType.Type = envsubstOpType.Type + "_NO_UNSET" - } - - op := &Operation{OperationType: envsubstOpType, Value: envsubstOpType.Type, StringValue: value, Preferences: prefs} - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func flattenWithDepth() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - var depth, errParsingInt = extractNumberParameter(value) - if errParsingInt != nil { - return nil, errParsingInt - } - - prefs := flattenPreferences{depth: depth} - op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs} - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - var indent, errParsingInt = extractNumberParameter(value) - if errParsingInt != nil { - return nil, errParsingInt - } - - prefs := encoderPreferences{format: outputFormat, indent: indent} - op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs} - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func assignAllCommentsOp(updateAssign bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - log.Debug("assignAllCommentsOp %v", string(m.Bytes)) - value := string(m.Bytes) - op := &Operation{ - OperationType: assignCommentOpType, - Value: assignCommentOpType.Type, - StringValue: value, - UpdateAssign: updateAssign, - Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true}, - } - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func literalToken(pType tokenType, checkForPost bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - return &token{TokenType: pType, CheckForPostTraverse: checkForPost, Match: m}, nil - } -} - -func unwrap(value string) string { - return value[1 : len(value)-1] -} - -func numberValue() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - var numberString = string(m.Bytes) - var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) - if errParsingInt != nil { - return nil, errParsingInt - } - - return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil - } -} - -func hexValue() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - var originalString = string(m.Bytes) - var numberString = originalString[2:] - log.Debugf("numberString: %v", numberString) - var number, errParsingInt = strconv.ParseInt(numberString, 16, 64) - if errParsingInt != nil { - return nil, errParsingInt - } - - return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil - } -} - -func floatValue() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - var numberString = string(m.Bytes) - var number, errParsingInt = strconv.ParseFloat(numberString, 64) - if errParsingInt != nil { - return nil, errParsingInt - } - return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil - } -} - -func booleanValue(val bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - return &token{TokenType: operationToken, Operation: createValueOperation(val, string(m.Bytes))}, nil - } -} - -func stringValue(wrapped bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - if wrapped { - value = unwrap(value) - } - value = strings.ReplaceAll(value, "\\\"", "\"") - return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil - } -} - -func getVariableOpToken() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - - value = value[1:] - - getVarOperation := createValueOperation(value, value) - getVarOperation.OperationType = getVariableOpType - - return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil - } -} - -func envOp(strenv bool) lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - value := string(m.Bytes) - preferences := envOpPreferences{} - - if strenv { - // strenv( ) - value = value[7 : len(value)-1] - preferences.StringValue = true - } else { - //env( ) - value = value[4 : len(value)-1] - } - - envOperation := createValueOperation(value, value) - envOperation.OperationType = envOpType - envOperation.Preferences = preferences - - return &token{TokenType: operationToken, Operation: envOperation}, nil - } -} - -func nullValue() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - return &token{TokenType: operationToken, Operation: createValueOperation(nil, string(m.Bytes))}, nil - } -} - -func selfToken() lex.Action { - return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - op := &Operation{OperationType: selfReferenceOpType} - return &token{TokenType: operationToken, Operation: op}, nil - } -} - -func initLexer() (*lex.Lexer, error) { - lexer := lex.NewLexer() - lexer.Add([]byte(`\(`), literalToken(openBracket, false)) - lexer.Add([]byte(`\)`), literalToken(closeBracket, true)) - - lexer.Add([]byte(`\.\[`), literalToken(traverseArrayCollect, false)) - lexer.Add([]byte(`\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true, - TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: false}})) - - lexer.Add([]byte(`\.\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true, - TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: true}})) - - lexer.Add([]byte(`,`), opToken(unionOpType)) - lexer.Add([]byte(`:\s*`), opToken(createMapOpType)) - lexer.Add([]byte(`length`), opToken(lengthOpType)) - lexer.Add([]byte(`line`), opToken(lineOpType)) - lexer.Add([]byte(`column`), opToken(columnOpType)) - - lexer.Add([]byte(`eval`), opToken(evalOpType)) - - lexer.Add([]byte(`map`), opToken(mapOpType)) - lexer.Add([]byte(`map_values`), opToken(mapValuesOpType)) - lexer.Add([]byte(`pick`), opToken(pickOpType)) - - lexer.Add([]byte(`flatten\([0-9]+\)`), flattenWithDepth()) - lexer.Add([]byte(`flatten`), opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1})) - - lexer.Add([]byte(`format_datetime`), opToken(formatDateTimeOpType)) - lexer.Add([]byte(`now`), opToken(nowOpType)) - lexer.Add([]byte(`tz`), opToken(tzOpType)) - lexer.Add([]byte(`with_dtf`), opToken(withDtFormatOpType)) - - lexer.Add([]byte(`error`), opToken(errorOpType)) - - lexer.Add([]byte(`toyaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat)) - lexer.Add([]byte(`to_yaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat)) - - lexer.Add([]byte(`toxml\([0-9]+\)`), encodeWithIndent(XMLOutputFormat)) - lexer.Add([]byte(`to_xml\([0-9]+\)`), encodeWithIndent(XMLOutputFormat)) - - lexer.Add([]byte(`tojson\([0-9]+\)`), encodeWithIndent(JSONOutputFormat)) - lexer.Add([]byte(`to_json\([0-9]+\)`), encodeWithIndent(JSONOutputFormat)) - - lexer.Add([]byte(`toyaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2})) - lexer.Add([]byte(`to_yaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2})) - // 0 indent doesn't work with yaml. - lexer.Add([]byte(`@yaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2})) - - lexer.Add([]byte(`tojson`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JSONOutputFormat, indent: 2})) - lexer.Add([]byte(`to_json`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JSONOutputFormat, indent: 2})) - lexer.Add([]byte(`@json`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JSONOutputFormat, indent: 0})) - - lexer.Add([]byte(`toprops`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2})) - lexer.Add([]byte(`to_props`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2})) - lexer.Add([]byte(`@props`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2})) - - lexer.Add([]byte(`tocsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CSVOutputFormat})) - lexer.Add([]byte(`to_csv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CSVOutputFormat})) - lexer.Add([]byte(`@csv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CSVOutputFormat})) - - lexer.Add([]byte(`totsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TSVOutputFormat})) - lexer.Add([]byte(`to_tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TSVOutputFormat})) - lexer.Add([]byte(`@tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TSVOutputFormat})) - - lexer.Add([]byte(`toxml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XMLOutputFormat})) - lexer.Add([]byte(`to_xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XMLOutputFormat, indent: 2})) - lexer.Add([]byte(`@xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XMLOutputFormat, indent: 0})) - - lexer.Add([]byte(`@base64`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: Base64OutputFormat})) - lexer.Add([]byte(`@base64d`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: Base64InputFormat})) - - lexer.Add([]byte(`fromyaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) - lexer.Add([]byte(`fromjson`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) - lexer.Add([]byte(`fromxml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XMLInputFormat})) - lexer.Add([]byte(`fromprops`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: PropertiesInputFormat})) - - lexer.Add([]byte(`from_yaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) - lexer.Add([]byte(`from_json`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) - lexer.Add([]byte(`from_xml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XMLInputFormat})) - lexer.Add([]byte(`from_props`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: PropertiesInputFormat})) - - lexer.Add([]byte(`@yamld`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) - lexer.Add([]byte(`@jsond`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) - lexer.Add([]byte(`@xmld`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XMLInputFormat})) - lexer.Add([]byte(`@propsd`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: PropertiesInputFormat})) - - lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType)) - lexer.Add([]byte(`sort_keys`), opToken(sortKeysOpType)) - - lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()})) - - lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)})) - lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)})) - lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)})) - - lexer.Add([]byte(`load_base64`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewBase64Decoder()})) - - lexer.Add([]byte(`load_props`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewPropertiesDecoder()})) - lexer.Add([]byte(`loadprops`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewPropertiesDecoder()})) - - lexer.Add([]byte(`strload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true})) - lexer.Add([]byte(`load_str`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true})) - lexer.Add([]byte(`loadstr`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true})) - - lexer.Add([]byte(`select`), opToken(selectOpType)) - lexer.Add([]byte(`has`), opToken(hasOpType)) - lexer.Add([]byte(`unique`), opToken(uniqueOpType)) - lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType)) - lexer.Add([]byte(`group_by`), opToken(groupByOpType)) - lexer.Add([]byte(`explode`), opToken(explodeOpType)) - lexer.Add([]byte(`or`), opToken(orOpType)) - lexer.Add([]byte(`and`), opToken(andOpType)) - lexer.Add([]byte(`not`), opToken(notOpType)) - lexer.Add([]byte(`ireduce`), opToken(reduceOpType)) - lexer.Add([]byte(`;`), opToken(blockOpType)) - lexer.Add([]byte(`\/\/`), opToken(alternativeOpType)) - - lexer.Add([]byte(`documentIndex`), opToken(getDocumentIndexOpType)) - lexer.Add([]byte(`document_index`), opToken(getDocumentIndexOpType)) - - lexer.Add([]byte(`di`), opToken(getDocumentIndexOpType)) - - lexer.Add([]byte(`splitDoc`), opToken(splitDocumentOpType)) - lexer.Add([]byte(`split_doc`), opToken(splitDocumentOpType)) - - lexer.Add([]byte(`join`), opToken(joinStringOpType)) - lexer.Add([]byte(`sub`), opToken(subStringOpType)) - lexer.Add([]byte(`match`), opToken(matchOpType)) - lexer.Add([]byte(`capture`), opToken(captureOpType)) - lexer.Add([]byte(`test`), opToken(testOpType)) - - lexer.Add([]byte(`upcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true})) - lexer.Add([]byte(`ascii_upcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true})) - - lexer.Add([]byte(`downcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false})) - lexer.Add([]byte(`ascii_downcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false})) - - lexer.Add([]byte(`sort`), opToken(sortOpType)) - lexer.Add([]byte(`sort_by`), opToken(sortByOpType)) - lexer.Add([]byte(`reverse`), opToken(reverseOpType)) - - lexer.Add([]byte(`any`), opToken(anyOpType)) - lexer.Add([]byte(`any_c`), opToken(anyConditionOpType)) - lexer.Add([]byte(`all`), opToken(allOpType)) - lexer.Add([]byte(`all_c`), opToken(allConditionOpType)) - lexer.Add([]byte(`contains`), opToken(containsOpType)) - - lexer.Add([]byte(`split`), opToken(splitStringOpType)) - - lexer.Add([]byte(`parent`), opToken(getParentOpType)) - lexer.Add([]byte(`key`), opToken(getKeyOpType)) - lexer.Add([]byte(`keys`), opToken(keysOpType)) - - lexer.Add([]byte(`style`), opAssignableToken(getStyleOpType, assignStyleOpType)) - - lexer.Add([]byte(`tag|type`), opAssignableToken(getTagOpType, assignTagOpType)) - - lexer.Add([]byte(`anchor`), opAssignableToken(getAnchorOpType, assignAnchorOpType)) - lexer.Add([]byte(`alias`), opAssignableToken(getAliasOptype, assignAliasOpType)) - lexer.Add([]byte(`filename`), opToken(getFilenameOpType)) - - lexer.Add([]byte(`fileIndex`), opToken(getFileIndexOpType)) - lexer.Add([]byte(`file_index`), opToken(getFileIndexOpType)) - - lexer.Add([]byte(`fi`), opToken(getFileIndexOpType)) - lexer.Add([]byte(`path`), opToken(getPathOpType)) - lexer.Add([]byte(`to_entries`), opToken(toEntriesOpType)) - lexer.Add([]byte(`from_entries`), opToken(fromEntriesOpType)) - lexer.Add([]byte(`with_entries`), opToken(withEntriesOpType)) - - lexer.Add([]byte(`with`), opToken(withOpType)) - - lexer.Add([]byte(`lineComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true})) - lexer.Add([]byte(`line_comment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true})) - - lexer.Add([]byte(`headComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true})) - lexer.Add([]byte(`head_comment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true})) - - lexer.Add([]byte(`footComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true})) - lexer.Add([]byte(`foot_comment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true})) - - lexer.Add([]byte(`comments\s*=`), assignAllCommentsOp(false)) - lexer.Add([]byte(`comments\s*\|=`), assignAllCommentsOp(true)) - - lexer.Add([]byte(`collect`), opToken(collectOpType)) - - lexer.Add([]byte(`\s*==\s*`), opToken(equalsOpType)) - lexer.Add([]byte(`\s*!=\s*`), opToken(notEqualsOpType)) - - lexer.Add([]byte(`\s*>=\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: true})) - lexer.Add([]byte(`\s*>\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: true})) - - lexer.Add([]byte(`\s*<=\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: false})) - lexer.Add([]byte(`\s*<\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: false})) - - lexer.Add([]byte(`\s*=\s*`), assignOpToken(false)) - - lexer.Add([]byte(`del`), opToken(deleteChildOpType)) - - lexer.Add([]byte(`\s*\|=\s*`), assignOpToken(true)) - - lexer.Add([]byte("( |\t|\n|\r)+"), skip) - - lexer.Add([]byte(`\."[^ "]+"\??`), pathToken(true)) - lexer.Add([]byte(`\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`), pathToken(false)) - lexer.Add([]byte(`\.`), selfToken()) - - lexer.Add([]byte(`\|`), opToken(pipeOpType)) - - lexer.Add([]byte(`0[xX][0-9A-Fa-f]+`), hexValue()) - lexer.Add([]byte(`-?\d+(\.\d+)`), floatValue()) - lexer.Add([]byte(`-?[1-9](\.\d+)?[Ee][-+]?\d+`), floatValue()) - lexer.Add([]byte(`-?\d+`), numberValue()) - - lexer.Add([]byte(`[Tt][Rr][Uu][Ee]`), booleanValue(true)) - lexer.Add([]byte(`[Ff][Aa][Ll][Ss][Ee]`), booleanValue(false)) - - lexer.Add([]byte(`[Nn][Uu][Ll][Ll]`), nullValue()) - lexer.Add([]byte(`~`), nullValue()) - - lexer.Add([]byte(`"([^"\\]*(\\.[^"\\]*)*)"`), stringValue(true)) - lexer.Add([]byte(`strenv\([^\)]+\)`), envOp(true)) - lexer.Add([]byte(`env\([^\)]+\)`), envOp(false)) - - lexer.Add([]byte(`envsubst\((ne|nu|ff| |,)+\)`), envSubstWithOptions()) - lexer.Add([]byte(`envsubst`), opToken(envsubstOpType)) - - lexer.Add([]byte(`\[`), literalToken(openCollect, false)) - lexer.Add([]byte(`\]\??`), literalToken(closeCollect, true)) - lexer.Add([]byte(`\{`), literalToken(openCollectObject, false)) - lexer.Add([]byte(`\}`), literalToken(closeCollectObject, true)) - lexer.Add([]byte(`\*=[\+|\?dn]*`), multiplyWithPrefs(multiplyAssignOpType)) - lexer.Add([]byte(`\*[\+|\?dn]*`), multiplyWithPrefs(multiplyOpType)) - - lexer.Add([]byte(`\+`), opToken(addOpType)) - lexer.Add([]byte(`\+=`), opToken(addAssignOpType)) - - lexer.Add([]byte(`\-`), opToken(subtractOpType)) - lexer.Add([]byte(`\-=`), opToken(subtractAssignOpType)) - lexer.Add([]byte(`\$[a-zA-Z_-0-9]+`), getVariableOpToken()) - lexer.Add([]byte(`as`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{})) - lexer.Add([]byte(`ref`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true})) - - err := lexer.CompileNFA() - if err != nil { - return nil, err - } - return lexer, nil -} - -type expressionTokeniser interface { - Tokenise(expression string) ([]*token, error) -} - -type expressionTokeniserImpl struct { - lexer *lex.Lexer -} - -func newExpressionTokeniser() expressionTokeniser { - var lexer, err = initLexer() - if err != nil { - panic(err) - } - return &expressionTokeniserImpl{lexer} -} - -func (p *expressionTokeniserImpl) Tokenise(expression string) ([]*token, error) { - scanner, err := p.lexer.Scanner([]byte(expression)) - - if err != nil { - return nil, fmt.Errorf("parsing expression: %w", err) - } - var tokens []*token - for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() { - - if tok != nil { - currentToken := tok.(*token) - log.Debugf("Tokenising %v", currentToken.toString(true)) - tokens = append(tokens, currentToken) - } - if err != nil { - return nil, fmt.Errorf("parsing expression: %w", err) - } - } - var postProcessedTokens = make([]*token, 0) - - skipNextToken := false - - for index := range tokens { - if skipNextToken { - skipNextToken = false - } else { - postProcessedTokens, skipNextToken = p.handleToken(tokens, index, postProcessedTokens) - } - } - - return postProcessedTokens, nil -} - -func (p *expressionTokeniserImpl) handleToken(tokens []*token, index int, postProcessedTokens []*token) (tokensAccum []*token, skipNextToken bool) { - skipNextToken = false - currentToken := tokens[index] - - log.Debug("processing %v", currentToken.toString(true)) - - if currentToken.TokenType == traverseArrayCollect { - // `.[exp]`` works by creating a traversal array of [self, exp] and piping that into the traverse array operator - //need to put a traverse array then a collect currentToken - // do this by adding traverse then converting currentToken to collect - - log.Debug(" adding self") - op := &Operation{OperationType: selfReferenceOpType, StringValue: "SELF"} - postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) - - log.Debug(" adding traverse array") - op = &Operation{OperationType: traverseArrayOpType, StringValue: "TRAVERSE_ARRAY"} - postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) - - currentToken = &token{TokenType: openCollect} - - } - - if index != len(tokens)-1 && currentToken.AssignOperation != nil && - tokens[index+1].TokenType == operationToken && - tokens[index+1].Operation.OperationType == assignOpType { - log.Debug(" its an update assign") - currentToken.Operation = currentToken.AssignOperation - currentToken.Operation.UpdateAssign = tokens[index+1].Operation.UpdateAssign - skipNextToken = true - } - - log.Debug(" adding token to the fixed list") - postProcessedTokens = append(postProcessedTokens, currentToken) - - if index != len(tokens)-1 && - ((currentToken.TokenType == openCollect && tokens[index+1].TokenType == closeCollect) || - (currentToken.TokenType == openCollectObject && tokens[index+1].TokenType == closeCollectObject)) { - log.Debug(" adding empty") - op := &Operation{OperationType: emptyOpType, StringValue: "EMPTY"} - postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) - } - - if index != len(tokens)-1 && currentToken.CheckForPostTraverse && - ((tokens[index+1].TokenType == operationToken && (tokens[index+1].Operation.OperationType == traversePathOpType)) || - (tokens[index+1].TokenType == traverseArrayCollect)) { - log.Debug(" adding pipe because the next thing is traverse") - op := &Operation{OperationType: shortPipeOpType, Value: "PIPE", StringValue: "."} - postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) - } - if index != len(tokens)-1 && currentToken.CheckForPostTraverse && - tokens[index+1].TokenType == openCollect { - - log.Debug(" adding traverArray because next is opencollect") - op := &Operation{OperationType: traverseArrayOpType} - postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) - } - return postProcessedTokens, skipNextToken -} diff --git a/pkg/yqlib/lexer.go b/pkg/yqlib/lexer.go new file mode 100644 index 00000000..d4ab3409 --- /dev/null +++ b/pkg/yqlib/lexer.go @@ -0,0 +1,159 @@ +package yqlib + +import ( + "fmt" + "regexp" + "strconv" +) + +type expressionTokeniser interface { + Tokenise(expression string) ([]*token, error) +} + +type tokenType uint32 + +const ( + operationToken = 1 << iota + openBracket + closeBracket + openCollect + closeCollect + openCollectObject + closeCollectObject + traverseArrayCollect +) + +type token struct { + TokenType tokenType + Operation *Operation + AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it + CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat + Match string +} + +func (t *token) toString(detail bool) string { + if t.TokenType == operationToken { + if detail { + return fmt.Sprintf("%v (%v)", t.Operation.toString(), t.Operation.OperationType.Precedence) + } + return t.Operation.toString() + } else if t.TokenType == openBracket { + return "(" + } else if t.TokenType == closeBracket { + return ")" + } else if t.TokenType == openCollect { + return "[" + } else if t.TokenType == closeCollect { + return "]" + } else if t.TokenType == openCollectObject { + return "{" + } else if t.TokenType == closeCollectObject { + return "}" + } else if t.TokenType == traverseArrayCollect { + return ".[" + + } else { + return "NFI" + } +} + +func unwrap(value string) string { + return value[1 : len(value)-1] +} + +func extractNumberParameter(value string) (int, error) { + parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`) + matches := parameterParser.FindStringSubmatch(value) + var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32) + if errParsingInt != nil { + return 0, errParsingInt + } + return int(indent), nil +} + +func hasOptionParameter(value string, option string) bool { + parameterParser := regexp.MustCompile(`.*\([^\)]*\)`) + matches := parameterParser.FindStringSubmatch(value) + if len(matches) == 0 { + return false + } + parameterString := matches[0] + optionParser := regexp.MustCompile(fmt.Sprintf("\\b%v\\b", option)) + return len(optionParser.FindStringSubmatch(parameterString)) > 0 +} + +func postProcessTokens(tokens []*token) []*token { + var postProcessedTokens = make([]*token, 0) + + skipNextToken := false + + for index := range tokens { + if skipNextToken { + skipNextToken = false + } else { + postProcessedTokens, skipNextToken = handleToken(tokens, index, postProcessedTokens) + } + } + + return postProcessedTokens +} + +func handleToken(tokens []*token, index int, postProcessedTokens []*token) (tokensAccum []*token, skipNextToken bool) { + skipNextToken = false + currentToken := tokens[index] + + log.Debug("processing %v", currentToken.toString(true)) + + if currentToken.TokenType == traverseArrayCollect { + // `.[exp]`` works by creating a traversal array of [self, exp] and piping that into the traverse array operator + //need to put a traverse array then a collect currentToken + // do this by adding traverse then converting currentToken to collect + + log.Debug(" adding self") + op := &Operation{OperationType: selfReferenceOpType, StringValue: "SELF"} + postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) + + log.Debug(" adding traverse array") + op = &Operation{OperationType: traverseArrayOpType, StringValue: "TRAVERSE_ARRAY"} + postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) + + currentToken = &token{TokenType: openCollect} + + } + + if index != len(tokens)-1 && currentToken.AssignOperation != nil && + tokens[index+1].TokenType == operationToken && + tokens[index+1].Operation.OperationType == assignOpType { + log.Debug(" its an update assign") + currentToken.Operation = currentToken.AssignOperation + currentToken.Operation.UpdateAssign = tokens[index+1].Operation.UpdateAssign + skipNextToken = true + } + + log.Debug(" adding token to the fixed list") + postProcessedTokens = append(postProcessedTokens, currentToken) + + if index != len(tokens)-1 && + ((currentToken.TokenType == openCollect && tokens[index+1].TokenType == closeCollect) || + (currentToken.TokenType == openCollectObject && tokens[index+1].TokenType == closeCollectObject)) { + log.Debug(" adding empty") + op := &Operation{OperationType: emptyOpType, StringValue: "EMPTY"} + postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) + } + + if index != len(tokens)-1 && currentToken.CheckForPostTraverse && + ((tokens[index+1].TokenType == operationToken && (tokens[index+1].Operation.OperationType == traversePathOpType)) || + (tokens[index+1].TokenType == traverseArrayCollect)) { + log.Debug(" adding pipe because the next thing is traverse") + op := &Operation{OperationType: shortPipeOpType, Value: "PIPE", StringValue: "."} + postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) + } + if index != len(tokens)-1 && currentToken.CheckForPostTraverse && + tokens[index+1].TokenType == openCollect { + + log.Debug(" adding traverArray because next is opencollect") + op := &Operation{OperationType: traverseArrayOpType} + postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op}) + } + return postProcessedTokens, skipNextToken +} diff --git a/pkg/yqlib/lexer_participle.go b/pkg/yqlib/lexer_participle.go new file mode 100644 index 00000000..09e13844 --- /dev/null +++ b/pkg/yqlib/lexer_participle.go @@ -0,0 +1,530 @@ +package yqlib + +import ( + "strconv" + "strings" + + "github.com/alecthomas/participle/v2/lexer" +) + +var participleYqRules = []*participleYqRule{ + {"LINE_COMMENT", `line_?comment|lineComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true}), 0}, + {"HEAD_COMMENT", `head_?comment|headComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true}), 0}, + {"FOOT_COMMENT", `foot_?comment|footComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true}), 0}, + + {"OpenBracket", `\(`, literalToken(openBracket, false), 0}, + {"CloseBracket", `\)`, literalToken(closeBracket, true), 0}, + {"OpenTraverseArrayCollect", `\.\[`, literalToken(traverseArrayCollect, false), 0}, + + {"OpenCollect", `\[`, literalToken(openCollect, false), 0}, + {"CloseCollect", `\]\??`, literalToken(closeCollect, true), 0}, + + {"OpenCollectObject", `\{`, literalToken(openCollectObject, false), 0}, + {"CloseCollectObject", `\}`, literalToken(closeCollectObject, true), 0}, + + {"RecursiveDecentIncludingKeys", `\.\.\.`, recursiveDecentOpToken(true), 0}, + {"RecursiveDecent", `\.\.`, recursiveDecentOpToken(false), 0}, + + {"GetVariable", `\$[a-zA-Z_\-0-9]+`, getVariableOpToken(), 0}, + {"AsignAsVariable", `as`, opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{}), 0}, + {"AsignRefVariable", `ref`, opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true}), 0}, + + {"CreateMap", `:\s*`, opToken(createMapOpType), 0}, + simpleOp("length", lengthOpType), + simpleOp("line", lineOpType), + simpleOp("column", columnOpType), + simpleOp("eval", evalOpType), + + {"MapValues", `map_?values`, opToken(mapValuesOpType), 0}, + simpleOp("map", mapOpType), + simpleOp("pick", pickOpType), + + {"FlattenWithDepth", `flatten\([0-9]+\)`, flattenWithDepth(), 0}, + {"Flatten", `flatten`, opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1}), 0}, + + simpleOp("format_datetime", formatDateTimeOpType), + simpleOp("now", nowOpType), + simpleOp("tz", tzOpType), + simpleOp("with_dtf", withDtFormatOpType), + simpleOp("error", errorOpType), + simpleOp("sortKeys", sortKeysOpType), + simpleOp("sort_?keys", sortKeysOpType), + + {"YamlEncodeWithIndent", `to_?yaml\([0-9]+\)`, encodeParseIndent(YamlOutputFormat), 0}, + {"XMLEncodeWithIndent", `to_?xml\([0-9]+\)`, encodeParseIndent(XMLOutputFormat), 0}, + {"JSONEncodeWithIndent", `to_?json\([0-9]+\)`, encodeParseIndent(JSONOutputFormat), 0}, + + {"YamlDecode", `from_?yaml|@yamld|from_?json|@jsond`, decodeOp(YamlInputFormat), 0}, + {"YamlEncode", `to_?yaml|@yaml`, encodeWithIndent(YamlOutputFormat, 2), 0}, + + {"JSONEncode", `to_?json`, encodeWithIndent(JSONOutputFormat, 2), 0}, + {"JSONEncodeNoIndent", `@json`, encodeWithIndent(JSONOutputFormat, 0), 0}, + + {"PropertiesDecode", `from_?props|@propsd`, decodeOp(PropertiesInputFormat), 0}, + {"PropsEncode", `to_?props|@props`, encodeWithIndent(PropsOutputFormat, 2), 0}, + + {"XmlDecode", `from_?xml|@xmld`, decodeOp(XMLInputFormat), 0}, + {"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0}, + {"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0}, + + {"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0}, + {"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0}, + + {"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0}, + {"Base64", `@base64`, encodeWithIndent(Base64OutputFormat, 0), 0}, + + {"LoadXML", `load_?xml|xml_?load`, loadOp(NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken), false), 0}, + + {"LoadBase64", `load_?base64`, loadOp(NewBase64Decoder(), false), 0}, + + {"LoadProperties", `load_?props`, loadOp(NewPropertiesDecoder(), false), 0}, + + {"LoadString", `load_?str|str_?load`, loadOp(nil, true), 0}, + + {"LoadYaml", `load`, loadOp(NewYamlDecoder(), false), 0}, + + {"SplitDocument", `splitDoc|split_?doc`, opToken(splitDocumentOpType), 0}, + + simpleOp("select", selectOpType), + simpleOp("has", hasOpType), + simpleOp("unique_?by", uniqueByOpType), + simpleOp("unique", uniqueOpType), + + simpleOp("group_?by", groupByOpType), + simpleOp("explode", explodeOpType), + simpleOp("or", orOpType), + simpleOp("and", andOpType), + simpleOp("not", notOpType), + simpleOp("ireduce", reduceOpType), + + simpleOp("join", joinStringOpType), + simpleOp("sub", subStringOpType), + simpleOp("match", matchOpType), + simpleOp("capture", captureOpType), + simpleOp("test", testOpType), + + simpleOp("sort_?by", sortByOpType), + simpleOp("sort", sortOpType), + + simpleOp("reverse", reverseOpType), + + simpleOp("any_c", anyConditionOpType), + simpleOp("any", anyOpType), + + simpleOp("all_c", allConditionOpType), + simpleOp("all", allOpType), + + simpleOp("contains", containsOpType), + simpleOp("split", splitStringOpType), + simpleOp("parent", getParentOpType), + + simpleOp("keys", keysOpType), + simpleOp("key", getKeyOpType), + + simpleOp("file_?name|fileName", getFilenameOpType), + simpleOp("file_?index|fileIndex|fi", getFileIndexOpType), + simpleOp("path", getPathOpType), + + simpleOp("to_?entries|toEntries", toEntriesOpType), + simpleOp("from_?entries|fromEntries", fromEntriesOpType), + simpleOp("with_?entries|withEntries", withEntriesOpType), + + simpleOp("with", withOpType), + + simpleOp("collect", collectOpType), + simpleOp("del", deleteChildOpType), + + assignableOp("style", getStyleOpType, assignStyleOpType), + assignableOp("tag|type", getTagOpType, assignTagOpType), + assignableOp("anchor", getAnchorOpType, assignAnchorOpType), + assignableOp("alias", getAliasOpType, assignAliasOpType), + + {"ALL_COMMENTS", `comments\s*=`, assignAllCommentsOp(false), 0}, + {"ALL_COMMENTS_ASSIGN_RELATIVE", `comments\s*\|=`, assignAllCommentsOp(true), 0}, + + {"Block", `;`, opToken(blockOpType), 0}, + {"Alternative", `\/\/`, opToken(alternativeOpType), 0}, + + {"DocumentIndex", `documentIndex|document_?index|di`, opToken(getDocumentIndexOpType), 0}, + + {"Uppercase", `upcase|ascii_?upcase`, opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true}), 0}, + {"Downcase", `downcase|ascii_?downcase`, opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false}), 0}, + + {"HexValue", `0[xX][0-9A-Fa-f]+`, hexValue(), 0}, + {"FloatValueScientific", `-?[1-9](\.\d+)?[Ee][-+]?\d+`, floatValue(), 0}, + {"FloatValue", `-?\d+(\.\d+)`, floatValue(), 0}, + + {"NumberValue", `-?\d+`, numberValue(), 0}, + + {"TrueBooleanValue", `[Tt][Rr][Uu][Ee]`, booleanValue(true), 0}, + {"FalseBooleanValue", `[Ff][Aa][Ll][Ss][Ee]`, booleanValue(false), 0}, + + {"NullValue", `[Nn][Uu][Ll][Ll]|~`, nullValue(), 0}, + + {"QuotedStringValue", `"([^"\\]*(\\.[^"\\]*)*)"`, stringValue(), 0}, + + {"StrEnvOp", `strenv\([^\)]+\)`, envOp(true), 0}, + {"EnvOp", `env\([^\)]+\)`, envOp(false), 0}, + + {"EnvSubstWithOptions", `envsubst\((ne|nu|ff| |,)+\)`, envSubstWithOptions(), 0}, + simpleOp("envsubst", envsubstOpType), + + {"Equals", `\s*==\s*`, opToken(equalsOpType), 0}, + {"NotEquals", `\s*!=\s*`, opToken(notEqualsOpType), 0}, + + {"GreaterThanEquals", `\s*>=\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: true}), 0}, + {"LessThanEquals", `\s*<=\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: false}), 0}, + + {"GreaterThan", `\s*>\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: true}), 0}, + {"LessThan", `\s*<\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: false}), 0}, + + {"AssignRelative", `\|=`, assignOpToken(true), 0}, + {"Assign", `=`, assignOpToken(false), 0}, + + {`whitespace`, `[ \t\n]+`, nil, 0}, + + {"WrappedPathElement", `\."[^ "]+"\??`, pathToken(true), 0}, + {"PathElement", `\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`, pathToken(false), 0}, + {"Pipe", `\|`, opToken(pipeOpType), 0}, + {"Self", `\.`, opToken(selfReferenceOpType), 0}, + + {"Union", `,`, opToken(unionOpType), 0}, + + {"MultiplyAssign", `\*=[\+|\?dn]*`, multiplyWithPrefs(multiplyAssignOpType), 0}, + {"Multiply", `\*[\+|\?dn]*`, multiplyWithPrefs(multiplyOpType), 0}, + + {"AddAssign", `\+=`, opToken(addAssignOpType), 0}, + {"Add", `\+`, opToken(addOpType), 0}, + + {"SubtractAssign", `\-=`, opToken(subtractAssignOpType), 0}, + {"Subtract", `\-`, opToken(subtractOpType), 0}, +} + +type yqAction func(lexer.Token) (*token, error) + +type participleYqRule struct { + Name string + Pattern string + CreateYqToken yqAction + ParticipleTokenType lexer.TokenType +} + +type participleLexer struct { + lexerDefinition lexer.StringDefinition +} + +func simpleOp(name string, opType *operationType) *participleYqRule { + return &participleYqRule{strings.ToUpper(string(name[1])) + name[1:], name, opToken(opType), 0} +} + +func assignableOp(name string, opType *operationType, assignOpType *operationType) *participleYqRule { + return &participleYqRule{strings.ToUpper(string(name[1])) + name[1:], name, opTokenWithPrefs(opType, assignOpType, nil), 0} +} + +func newParticipleLexer() expressionTokeniser { + simpleRules := make([]lexer.SimpleRule, len(participleYqRules)) + for i, yqRule := range participleYqRules { + simpleRules[i] = lexer.SimpleRule{Name: yqRule.Name, Pattern: yqRule.Pattern} + } + lexerDefinition := lexer.MustSimple(simpleRules) + symbols := lexerDefinition.Symbols() + + for _, yqRule := range participleYqRules { + yqRule.ParticipleTokenType = symbols[yqRule.Name] + } + + return &participleLexer{lexerDefinition} +} + +func pathToken(wrapped bool) yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + prefs := traversePreferences{} + + if value[len(value)-1:] == "?" { + prefs.OptionalTraverse = true + value = value[:len(value)-1] + } + + value = value[1:] + if wrapped { + value = unwrap(value) + } + log.Debug("PathToken %v", value) + op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil + } +} + +func recursiveDecentOpToken(includeMapKeys bool) yqAction { + prefs := recursiveDescentPreferences{ + RecurseArray: true, + TraversePreferences: traversePreferences{ + DontFollowAlias: true, + IncludeMapKeys: includeMapKeys, + }, + } + return opTokenWithPrefs(recursiveDescentOpType, nil, prefs) +} + +func opTokenWithPrefs(opType *operationType, assignOpType *operationType, preferences interface{}) yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + op := &Operation{OperationType: opType, Value: opType.Type, StringValue: value, Preferences: preferences} + var assign *Operation + if assignOpType != nil { + assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences} + } + return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil + } +} + +func flattenWithDepth() yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + var depth, errParsingInt = extractNumberParameter(value) + if errParsingInt != nil { + return nil, errParsingInt + } + + prefs := flattenPreferences{depth: depth} + op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op}, nil + } +} + +func assignAllCommentsOp(updateAssign bool) yqAction { + return func(rawToken lexer.Token) (*token, error) { + log.Debug("assignAllCommentsOp %v", rawToken.Value) + value := rawToken.Value + op := &Operation{ + OperationType: assignCommentOpType, + Value: assignCommentOpType.Type, + StringValue: value, + UpdateAssign: updateAssign, + Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true}, + } + return &token{TokenType: operationToken, Operation: op}, nil + } +} + +func assignOpToken(updateAssign bool) yqAction { + return func(rawToken lexer.Token) (*token, error) { + log.Debug("assignOpToken %v", rawToken.Value) + value := rawToken.Value + prefs := assignPreferences{DontOverWriteAnchor: true} + op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op}, nil + } +} + +func booleanValue(val bool) yqAction { + return func(rawToken lexer.Token) (*token, error) { + return &token{TokenType: operationToken, Operation: createValueOperation(val, rawToken.Value)}, nil + } +} + +func nullValue() yqAction { + return func(rawToken lexer.Token) (*token, error) { + return &token{TokenType: operationToken, Operation: createValueOperation(nil, rawToken.Value)}, nil + } +} + +func stringValue() yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := unwrap(rawToken.Value) + value = strings.ReplaceAll(value, "\\\"", "\"") + return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil + } +} + +func envOp(strenv bool) yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + preferences := envOpPreferences{} + + if strenv { + // strenv( ) + value = value[7 : len(value)-1] + preferences.StringValue = true + } else { + //env( ) + value = value[4 : len(value)-1] + } + + envOperation := createValueOperation(value, value) + envOperation.OperationType = envOpType + envOperation.Preferences = preferences + + return &token{TokenType: operationToken, Operation: envOperation}, nil + } +} + +func envSubstWithOptions() yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + noEmpty := hasOptionParameter(value, "ne") + noUnset := hasOptionParameter(value, "nu") + failFast := hasOptionParameter(value, "ff") + envsubstOpType.Type = "ENVSUBST" + prefs := envOpPreferences{NoUnset: noUnset, NoEmpty: noEmpty, FailFast: failFast} + if noEmpty { + envsubstOpType.Type = envsubstOpType.Type + "_NO_EMPTY" + } + if noUnset { + envsubstOpType.Type = envsubstOpType.Type + "_NO_UNSET" + } + + op := &Operation{OperationType: envsubstOpType, Value: envsubstOpType.Type, StringValue: value, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op}, nil + } +} + +func multiplyWithPrefs(op *operationType) yqAction { + return func(rawToken lexer.Token) (*token, error) { + prefs := multiplyPreferences{} + options := rawToken.Value + if strings.Contains(options, "+") { + prefs.AppendArrays = true + } + if strings.Contains(options, "?") { + prefs.TraversePrefs = traversePreferences{DontAutoCreate: true} + } + if strings.Contains(options, "n") { + prefs.AssignPrefs = assignPreferences{OnlyWriteNull: true} + } + if strings.Contains(options, "d") { + prefs.DeepMergeArrays = true + } + prefs.TraversePrefs.DontFollowAlias = true + op := &Operation{OperationType: op, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op}, nil + } + +} + +func getVariableOpToken() yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + + value = value[1:] + + getVarOperation := createValueOperation(value, value) + getVarOperation.OperationType = getVariableOpType + + return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil + } +} + +func hexValue() yqAction { + return func(rawToken lexer.Token) (*token, error) { + var originalString = rawToken.Value + var numberString = originalString[2:] + log.Debugf("numberString: %v", numberString) + var number, errParsingInt = strconv.ParseInt(numberString, 16, 64) + if errParsingInt != nil { + return nil, errParsingInt + } + + return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil + } +} + +func floatValue() yqAction { + return func(rawToken lexer.Token) (*token, error) { + var numberString = rawToken.Value + var number, errParsingInt = strconv.ParseFloat(numberString, 64) + if errParsingInt != nil { + return nil, errParsingInt + } + return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil + } +} + +func numberValue() yqAction { + return func(rawToken lexer.Token) (*token, error) { + var numberString = rawToken.Value + var number, errParsingInt = strconv.ParseInt(numberString, 10, 64) + if errParsingInt != nil { + return nil, errParsingInt + } + + return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil + } +} + +func encodeParseIndent(outputFormat PrinterOutputFormat) yqAction { + return func(rawToken lexer.Token) (*token, error) { + value := rawToken.Value + var indent, errParsingInt = extractNumberParameter(value) + if errParsingInt != nil { + return nil, errParsingInt + } + + prefs := encoderPreferences{format: outputFormat, indent: indent} + op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op}, nil + } +} + +func encodeWithIndent(outputFormat PrinterOutputFormat, indent int) yqAction { + prefs := encoderPreferences{format: outputFormat, indent: indent} + return opTokenWithPrefs(encodeOpType, nil, prefs) +} + +func decodeOp(inputFormat InputFormat) yqAction { + prefs := decoderPreferences{format: inputFormat} + return opTokenWithPrefs(decodeOpType, nil, prefs) +} + +func loadOp(decoder Decoder, loadAsString bool) yqAction { + prefs := loadPrefs{decoder: decoder, loadAsString: loadAsString} + return opTokenWithPrefs(loadOpType, nil, prefs) +} + +func opToken(op *operationType) yqAction { + return opTokenWithPrefs(op, nil, nil) +} + +func literalToken(tt tokenType, checkForPost bool) yqAction { + return func(rawToken lexer.Token) (*token, error) { + return &token{TokenType: tt, CheckForPostTraverse: checkForPost, Match: rawToken.Value}, nil + } +} + +func (p *participleLexer) getYqDefinition(rawToken lexer.Token) *participleYqRule { + for _, yqRule := range participleYqRules { + if yqRule.ParticipleTokenType == rawToken.Type { + return yqRule + } + } + return &participleYqRule{} +} + +func (p *participleLexer) Tokenise(expression string) ([]*token, error) { + myLexer, err := p.lexerDefinition.LexString("", expression) + if err != nil { + return nil, err + } + tokens := make([]*token, 0) + + for { + rawToken, e := myLexer.Next() + if e != nil { + return nil, e + } else if rawToken.Type == lexer.EOF { + return postProcessTokens(tokens), nil + } + + definition := p.getYqDefinition(rawToken) + if definition.CreateYqToken != nil { + token, e := definition.CreateYqToken(rawToken) + if e != nil { + return nil, e + } + tokens = append(tokens, token) + } + + } + +} diff --git a/pkg/yqlib/lexer_participle_test.go b/pkg/yqlib/lexer_participle_test.go new file mode 100644 index 00000000..7dfa0f60 --- /dev/null +++ b/pkg/yqlib/lexer_participle_test.go @@ -0,0 +1,538 @@ +package yqlib + +import ( + "testing" + + "github.com/alecthomas/repr" + "github.com/mikefarah/yq/v4/test" + "gopkg.in/yaml.v3" +) + +type participleLexerScenario struct { + expression string + tokens []*token +} + +var participleLexerScenarios = []participleLexerScenario{ + { + expression: ".a", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: ".a.b", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: shortPipeOpType, + Value: "PIPE", + StringValue: ".", + Preferences: nil, + }, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "b", + StringValue: "b", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: ".a.b?", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: shortPipeOpType, + Value: "PIPE", + StringValue: ".", + Preferences: nil, + }, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "b", + StringValue: "b", + Preferences: traversePreferences{ + OptionalTraverse: true, + }, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: `.a."b?"`, + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: shortPipeOpType, + Value: "PIPE", + StringValue: ".", + Preferences: nil, + }, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "b?", + StringValue: "b?", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: ` .a ."b?"`, + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: shortPipeOpType, + Value: "PIPE", + StringValue: ".", + Preferences: nil, + }, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "b?", + StringValue: "b?", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: `.a | .b`, + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: pipeOpType, + Value: "PIPE", + StringValue: "|", + Preferences: nil, + }, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "b", + StringValue: "b", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: "(.a)", + tokens: []*token{ + { + TokenType: openBracket, + Match: "(", + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: closeBracket, + Match: ")", + CheckForPostTraverse: true, + }, + }, + }, + { + expression: "..", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: recursiveDescentOpType, + Value: "RECURSIVE_DESCENT", + StringValue: "..", + Preferences: recursiveDescentPreferences{ + RecurseArray: true, + TraversePreferences: traversePreferences{ + DontFollowAlias: true, + IncludeMapKeys: false, + }, + }, + }, + }, + }, + }, + { + expression: "...", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: recursiveDescentOpType, + Value: "RECURSIVE_DESCENT", + StringValue: "...", + Preferences: recursiveDescentPreferences{ + RecurseArray: true, + TraversePreferences: traversePreferences{ + DontFollowAlias: true, + IncludeMapKeys: true, + }, + }, + }, + }, + }, + }, + { + expression: ".a,.b", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "a", + StringValue: "a", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: unionOpType, + Value: "UNION", + StringValue: ",", + Preferences: nil, + }, + }, + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: traversePathOpType, + Value: "b", + StringValue: "b", + Preferences: traversePreferences{}, + }, + CheckForPostTraverse: true, + }, + }, + }, + { + expression: "map_values", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: mapValuesOpType, + Value: "MAP_VALUES", + StringValue: "map_values", + Preferences: nil, + }, + }, + }, + }, + { + expression: "mapvalues", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: mapValuesOpType, + Value: "MAP_VALUES", + StringValue: "mapvalues", + Preferences: nil, + }, + }, + }, + }, + { + expression: "flatten(3)", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: flattenOpType, + Value: "FLATTEN_BY", + StringValue: "flatten(3)", + Preferences: flattenPreferences{depth: 3}, + }, + }, + }, + }, + { + expression: "flatten", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: flattenOpType, + Value: "FLATTEN_BY", + StringValue: "flatten", + Preferences: flattenPreferences{depth: -1}, + }, + }, + }, + }, + { + expression: "length", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: lengthOpType, + Value: "LENGTH", + StringValue: "length", + Preferences: nil, + }, + }, + }, + }, + { + expression: "format_datetime", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: formatDateTimeOpType, + Value: "FORMAT_DATE_TIME", + StringValue: "format_datetime", + Preferences: nil, + }, + }, + }, + }, + { + expression: "to_yaml(3)", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: encodeOpType, + Value: "ENCODE", + StringValue: "to_yaml(3)", + Preferences: encoderPreferences{ + format: YamlOutputFormat, + indent: 3, + }, + }, + }, + }, + }, + { + expression: "tojson(2)", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: encodeOpType, + Value: "ENCODE", + StringValue: "tojson(2)", + Preferences: encoderPreferences{ + format: JSONOutputFormat, + indent: 2, + }, + }, + }, + }, + }, + { + expression: "@yaml", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: encodeOpType, + Value: "ENCODE", + StringValue: "@yaml", + Preferences: encoderPreferences{ + format: YamlOutputFormat, + indent: 2, + }, + }, + }, + }, + }, + { + expression: "to_props", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: encodeOpType, + Value: "ENCODE", + StringValue: "to_props", + Preferences: encoderPreferences{ + format: PropsOutputFormat, + indent: 2, + }, + }, + }, + }, + }, + { + expression: "@base64d", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: decodeOpType, + Value: "DECODE", + StringValue: "@base64d", + Preferences: decoderPreferences{ + format: Base64InputFormat, + }, + }, + }, + }, + }, + { + expression: "@base64", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: encodeOpType, + Value: "ENCODE", + StringValue: "@base64", + Preferences: encoderPreferences{ + format: Base64OutputFormat, + }, + }, + }, + }, + }, + { + expression: "@yamld", + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: decodeOpType, + Value: "DECODE", + StringValue: "@yamld", + Preferences: decoderPreferences{ + format: YamlInputFormat, + }, + }, + }, + }, + }, + { + expression: `"string with a \""`, + tokens: []*token{ + { + TokenType: operationToken, + Operation: &Operation{ + OperationType: valueOpType, + Value: `string with a "`, + StringValue: `string with a "`, + Preferences: nil, + CandidateNode: &CandidateNode{ + Node: &yaml.Node{ + Kind: yaml.ScalarNode, + Tag: "!!str", + Value: `string with a "`, + }, + }, + }, + }, + }, + }, +} + +func TestParticipleLexer(t *testing.T) { + log.Errorf("TestParticiple") + lexer := newParticipleLexer() + + for _, scenario := range participleLexerScenarios { + actual, err := lexer.Tokenise(scenario.expression) + if err != nil { + t.Error(err) + } else { + test.AssertResultWithContext(t, repr.String(scenario.tokens, repr.Indent(" ")), repr.String(actual, repr.Indent(" ")), scenario.expression) + } + + } +} diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 6753a9a1..bbacb374 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -126,7 +126,7 @@ var getParentOpType = &operationType{Type: "GET_PARENT", NumArgs: 0, Precedence: var getCommentOpType = &operationType{Type: "GET_COMMENT", NumArgs: 0, Precedence: 50, Handler: getCommentsOperator} var getAnchorOpType = &operationType{Type: "GET_ANCHOR", NumArgs: 0, Precedence: 50, Handler: getAnchorOperator} -var getAliasOptype = &operationType{Type: "GET_ALIAS", NumArgs: 0, Precedence: 50, Handler: getAliasOperator} +var getAliasOpType = &operationType{Type: "GET_ALIAS", NumArgs: 0, Precedence: 50, Handler: getAliasOperator} var getDocumentIndexOpType = &operationType{Type: "GET_DOCUMENT_INDEX", NumArgs: 0, Precedence: 50, Handler: getDocumentIndexOperator} var getFilenameOpType = &operationType{Type: "GET_FILENAME", NumArgs: 0, Precedence: 50, Handler: getFilenameOperator} var getFileIndexOpType = &operationType{Type: "GET_FILE_INDEX", NumArgs: 0, Precedence: 50, Handler: getFileIndexOperator}