Using new lexer by alecthomas!

This commit is contained in:
Mike Farah 2022-07-13 14:30:35 +10:00
parent f393cf37c2
commit 0c24ba23f0
10 changed files with 1239 additions and 701 deletions

4
go.mod
View File

@ -2,6 +2,8 @@ module github.com/mikefarah/yq/v4
require ( require (
github.com/a8m/envsubst v1.3.0 github.com/a8m/envsubst v1.3.0
github.com/alecthomas/participle/v2 v2.0.0-beta.4
github.com/alecthomas/repr v0.1.0
github.com/elliotchance/orderedmap v1.4.0 github.com/elliotchance/orderedmap v1.4.0
github.com/fatih/color v1.13.0 github.com/fatih/color v1.13.0
github.com/goccy/go-yaml v1.9.5 github.com/goccy/go-yaml v1.9.5
@ -9,7 +11,6 @@ require (
github.com/magiconair/properties v1.8.6 github.com/magiconair/properties v1.8.6
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e
github.com/spf13/cobra v1.5.0 github.com/spf13/cobra v1.5.0
github.com/timtadh/lexmachine v0.2.2
golang.org/x/net v0.0.0-20220708220712-1185a9018129 golang.org/x/net v0.0.0-20220708220712-1185a9018129
gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473 gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
@ -20,7 +21,6 @@ require (
github.com/mattn/go-colorable v0.1.12 // indirect github.com/mattn/go-colorable v0.1.12 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect github.com/mattn/go-isatty v0.0.14 // indirect
github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/pflag v1.0.5 // indirect
github.com/timtadh/data-structures v0.5.3 // indirect
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e // indirect golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e // indirect
golang.org/x/text v0.3.7 // indirect golang.org/x/text v0.3.7 // indirect
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect

10
go.sum
View File

@ -1,5 +1,10 @@
github.com/a8m/envsubst v1.3.0 h1:GmXKmVssap0YtlU3E230W98RWtWCyIZzjtf1apWWyAg= github.com/a8m/envsubst v1.3.0 h1:GmXKmVssap0YtlU3E230W98RWtWCyIZzjtf1apWWyAg=
github.com/a8m/envsubst v1.3.0/go.mod h1:MVUTQNGQ3tsjOOtKCNd+fl8RzhsXcDvvAEzkhGtlsbY= github.com/a8m/envsubst v1.3.0/go.mod h1:MVUTQNGQ3tsjOOtKCNd+fl8RzhsXcDvvAEzkhGtlsbY=
github.com/alecthomas/assert/v2 v2.0.3 h1:WKqJODfOiQG0nEJKFKzDIG3E29CN2/4zR9XGJzKIkbg=
github.com/alecthomas/participle/v2 v2.0.0-beta.4 h1:ublfGBm+x+p2j7KotHhrUMbKtejT7M0Gv1Mt1u3absw=
github.com/alecthomas/participle/v2 v2.0.0-beta.4/go.mod h1:RC764t6n4L8D8ITAJv0qdokritYSNR3wV5cVwmIEaMM=
github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE=
github.com/alecthomas/repr v0.1.0/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -14,6 +19,7 @@ github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/goccy/go-yaml v1.9.5 h1:Eh/+3uk9kLxG4koCX6lRMAPS1OaMSAi+FJcya0INdB0= github.com/goccy/go-yaml v1.9.5 h1:Eh/+3uk9kLxG4koCX6lRMAPS1OaMSAi+FJcya0INdB0=
github.com/goccy/go-yaml v1.9.5/go.mod h1:U/jl18uSupI5rdI2jmuCswEA2htH9eXfferR3KfscvA= github.com/goccy/go-yaml v1.9.5/go.mod h1:U/jl18uSupI5rdI2jmuCswEA2htH9eXfferR3KfscvA=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jinzhu/copier v0.3.5 h1:GlvfUwHk62RokgqVNvYsku0TATCF7bAHVwEXoBh3iJg= github.com/jinzhu/copier v0.3.5 h1:GlvfUwHk62RokgqVNvYsku0TATCF7bAHVwEXoBh3iJg=
@ -41,10 +47,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/timtadh/data-structures v0.5.3 h1:F2tEjoG9qWIyUjbvXVgJqEOGJPMIiYn7U5W5mE+i/vQ=
github.com/timtadh/data-structures v0.5.3/go.mod h1:9R4XODhJ8JdWFEI8P/HJKqxuJctfBQw6fDibMQny2oU=
github.com/timtadh/lexmachine v0.2.2 h1:g55RnjdYazm5wnKv59pwFcBJHOyvTPfDEoz21s4PHmY=
github.com/timtadh/lexmachine v0.2.2/go.mod h1:GBJvD5OAfRn/gnp92zb9KTgHLB7akKyxmVivoYCcjQI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=

View File

@ -21,7 +21,7 @@ type expressionParserImpl struct {
} }
func newExpressionParser() ExpressionParserInterface { func newExpressionParser() ExpressionParserInterface {
return &expressionParserImpl{newExpressionTokeniser(), newExpressionPostFixer()} return &expressionParserImpl{newParticipleLexer(), newExpressionPostFixer()}
} }
func (p *expressionParserImpl) ParseExpression(expression string) (*ExpressionNode, error) { func (p *expressionParserImpl) ParseExpression(expression string) (*ExpressionNode, error) {

View File

@ -76,7 +76,7 @@ func (p *expressionPostFixerImpl) ConvertToPostfix(infixTokens []*token) ([]*Ope
// on the close op - move it to the traverse array op // on the close op - move it to the traverse array op
// allows for .["cat"]? // allows for .["cat"]?
prefs := traversePreferences{} prefs := traversePreferences{}
closeTokenMatch := string(currentToken.Match.Bytes) closeTokenMatch := currentToken.Match
if closeTokenMatch[len(closeTokenMatch)-1:] == "?" { if closeTokenMatch[len(closeTokenMatch)-1:] == "?" {
prefs.OptionalTraverse = true prefs.OptionalTraverse = true
} }

View File

@ -312,7 +312,7 @@ var pathTests = []struct {
}, },
} }
var tokeniser = newExpressionTokeniser() var tokeniser = newParticipleLexer()
var postFixer = newExpressionPostFixer() var postFixer = newExpressionPostFixer()
func TestPathParsing(t *testing.T) { func TestPathParsing(t *testing.T) {

View File

@ -1,691 +0,0 @@
package yqlib
import (
"fmt"
"regexp"
"strconv"
"strings"
lex "github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
)
func skip(*lex.Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
type tokenType uint32
const (
operationToken = 1 << iota
openBracket
closeBracket
openCollect
closeCollect
openCollectObject
closeCollectObject
traverseArrayCollect
)
type token struct {
TokenType tokenType
Operation *Operation
AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it
CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat
Match *machines.Match // match that created this token
}
func (t *token) toString(detail bool) string {
if t.TokenType == operationToken {
if detail {
return fmt.Sprintf("%v (%v)", t.Operation.toString(), t.Operation.OperationType.Precedence)
}
return t.Operation.toString()
} else if t.TokenType == openBracket {
return "("
} else if t.TokenType == closeBracket {
return ")"
} else if t.TokenType == openCollect {
return "["
} else if t.TokenType == closeCollect {
return "]"
} else if t.TokenType == openCollectObject {
return "{"
} else if t.TokenType == closeCollectObject {
return "}"
} else if t.TokenType == traverseArrayCollect {
return ".["
} else {
return "NFI"
}
}
func pathToken(wrapped bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
prefs := traversePreferences{}
if value[len(value)-1:] == "?" {
prefs.OptionalTraverse = true
value = value[:len(value)-1]
}
value = value[1:]
if wrapped {
value = unwrap(value)
}
log.Debug("PathToken %v", value)
op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil
}
}
func opToken(op *operationType) lex.Action {
return opTokenWithPrefs(op, nil, nil)
}
func opAssignableToken(opType *operationType, assignOpType *operationType) lex.Action {
return opTokenWithPrefs(opType, assignOpType, nil)
}
func assignOpToken(updateAssign bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
log.Debug("assignOpToken %v", string(m.Bytes))
value := string(m.Bytes)
prefs := assignPreferences{DontOverWriteAnchor: true}
op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func multiplyWithPrefs(op *operationType) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
prefs := multiplyPreferences{}
options := string(m.Bytes)
if strings.Contains(options, "+") {
prefs.AppendArrays = true
}
if strings.Contains(options, "?") {
prefs.TraversePrefs = traversePreferences{DontAutoCreate: true}
}
if strings.Contains(options, "n") {
prefs.AssignPrefs = assignPreferences{OnlyWriteNull: true}
}
if strings.Contains(options, "d") {
prefs.DeepMergeArrays = true
}
prefs.TraversePrefs.DontFollowAlias = true
op := &Operation{OperationType: op, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func opTokenWithPrefs(op *operationType, assignOpType *operationType, preferences interface{}) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
log.Debug("opTokenWithPrefs %v", string(m.Bytes))
value := string(m.Bytes)
op := &Operation{OperationType: op, Value: op.Type, StringValue: value, Preferences: preferences}
var assign *Operation
if assignOpType != nil {
assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences}
}
return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil
}
}
func hasOptionParameter(value string, option string) bool {
parameterParser := regexp.MustCompile(`.*\([^\)]*\)`)
matches := parameterParser.FindStringSubmatch(value)
if len(matches) == 0 {
return false
}
parameterString := matches[0]
optionParser := regexp.MustCompile(fmt.Sprintf("\\b%v\\b", option))
return len(optionParser.FindStringSubmatch(parameterString)) > 0
}
func extractNumberParameter(value string) (int, error) {
parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`)
matches := parameterParser.FindStringSubmatch(value)
var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32)
if errParsingInt != nil {
return 0, errParsingInt
}
return int(indent), nil
}
func envSubstWithOptions() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
noEmpty := hasOptionParameter(value, "ne")
noUnset := hasOptionParameter(value, "nu")
failFast := hasOptionParameter(value, "ff")
envsubstOpType.Type = "ENVSUBST"
prefs := envOpPreferences{NoUnset: noUnset, NoEmpty: noEmpty, FailFast: failFast}
if noEmpty {
envsubstOpType.Type = envsubstOpType.Type + "_NO_EMPTY"
}
if noUnset {
envsubstOpType.Type = envsubstOpType.Type + "_NO_UNSET"
}
op := &Operation{OperationType: envsubstOpType, Value: envsubstOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func flattenWithDepth() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
var depth, errParsingInt = extractNumberParameter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := flattenPreferences{depth: depth}
op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
var indent, errParsingInt = extractNumberParameter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := encoderPreferences{format: outputFormat, indent: indent}
op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func assignAllCommentsOp(updateAssign bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
log.Debug("assignAllCommentsOp %v", string(m.Bytes))
value := string(m.Bytes)
op := &Operation{
OperationType: assignCommentOpType,
Value: assignCommentOpType.Type,
StringValue: value,
UpdateAssign: updateAssign,
Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true},
}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func literalToken(pType tokenType, checkForPost bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return &token{TokenType: pType, CheckForPostTraverse: checkForPost, Match: m}, nil
}
}
func unwrap(value string) string {
return value[1 : len(value)-1]
}
func numberValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
}
}
func hexValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var originalString = string(m.Bytes)
var numberString = originalString[2:]
log.Debugf("numberString: %v", numberString)
var number, errParsingInt = strconv.ParseInt(numberString, 16, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil
}
}
func floatValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
var number, errParsingInt = strconv.ParseFloat(numberString, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
}
}
func booleanValue(val bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return &token{TokenType: operationToken, Operation: createValueOperation(val, string(m.Bytes))}, nil
}
}
func stringValue(wrapped bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
if wrapped {
value = unwrap(value)
}
value = strings.ReplaceAll(value, "\\\"", "\"")
return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil
}
}
func getVariableOpToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
value = value[1:]
getVarOperation := createValueOperation(value, value)
getVarOperation.OperationType = getVariableOpType
return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil
}
}
func envOp(strenv bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
preferences := envOpPreferences{}
if strenv {
// strenv( )
value = value[7 : len(value)-1]
preferences.StringValue = true
} else {
//env( )
value = value[4 : len(value)-1]
}
envOperation := createValueOperation(value, value)
envOperation.OperationType = envOpType
envOperation.Preferences = preferences
return &token{TokenType: operationToken, Operation: envOperation}, nil
}
}
func nullValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return &token{TokenType: operationToken, Operation: createValueOperation(nil, string(m.Bytes))}, nil
}
}
func selfToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
op := &Operation{OperationType: selfReferenceOpType}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func initLexer() (*lex.Lexer, error) {
lexer := lex.NewLexer()
lexer.Add([]byte(`\(`), literalToken(openBracket, false))
lexer.Add([]byte(`\)`), literalToken(closeBracket, true))
lexer.Add([]byte(`\.\[`), literalToken(traverseArrayCollect, false))
lexer.Add([]byte(`\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true,
TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: false}}))
lexer.Add([]byte(`\.\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true,
TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: true}}))
lexer.Add([]byte(`,`), opToken(unionOpType))
lexer.Add([]byte(`:\s*`), opToken(createMapOpType))
lexer.Add([]byte(`length`), opToken(lengthOpType))
lexer.Add([]byte(`line`), opToken(lineOpType))
lexer.Add([]byte(`column`), opToken(columnOpType))
lexer.Add([]byte(`eval`), opToken(evalOpType))
lexer.Add([]byte(`map`), opToken(mapOpType))
lexer.Add([]byte(`map_values`), opToken(mapValuesOpType))
lexer.Add([]byte(`pick`), opToken(pickOpType))
lexer.Add([]byte(`flatten\([0-9]+\)`), flattenWithDepth())
lexer.Add([]byte(`flatten`), opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1}))
lexer.Add([]byte(`format_datetime`), opToken(formatDateTimeOpType))
lexer.Add([]byte(`now`), opToken(nowOpType))
lexer.Add([]byte(`tz`), opToken(tzOpType))
lexer.Add([]byte(`with_dtf`), opToken(withDtFormatOpType))
lexer.Add([]byte(`error`), opToken(errorOpType))
lexer.Add([]byte(`toyaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat))
lexer.Add([]byte(`to_yaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat))
lexer.Add([]byte(`toxml\([0-9]+\)`), encodeWithIndent(XMLOutputFormat))
lexer.Add([]byte(`to_xml\([0-9]+\)`), encodeWithIndent(XMLOutputFormat))
lexer.Add([]byte(`tojson\([0-9]+\)`), encodeWithIndent(JSONOutputFormat))
lexer.Add([]byte(`to_json\([0-9]+\)`), encodeWithIndent(JSONOutputFormat))
lexer.Add([]byte(`toyaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2}))
lexer.Add([]byte(`to_yaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2}))
// 0 indent doesn't work with yaml.
lexer.Add([]byte(`@yaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2}))
lexer.Add([]byte(`tojson`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JSONOutputFormat, indent: 2}))
lexer.Add([]byte(`to_json`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JSONOutputFormat, indent: 2}))
lexer.Add([]byte(`@json`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JSONOutputFormat, indent: 0}))
lexer.Add([]byte(`toprops`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2}))
lexer.Add([]byte(`to_props`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2}))
lexer.Add([]byte(`@props`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2}))
lexer.Add([]byte(`tocsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CSVOutputFormat}))
lexer.Add([]byte(`to_csv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CSVOutputFormat}))
lexer.Add([]byte(`@csv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CSVOutputFormat}))
lexer.Add([]byte(`totsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TSVOutputFormat}))
lexer.Add([]byte(`to_tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TSVOutputFormat}))
lexer.Add([]byte(`@tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TSVOutputFormat}))
lexer.Add([]byte(`toxml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XMLOutputFormat}))
lexer.Add([]byte(`to_xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XMLOutputFormat, indent: 2}))
lexer.Add([]byte(`@xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XMLOutputFormat, indent: 0}))
lexer.Add([]byte(`@base64`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: Base64OutputFormat}))
lexer.Add([]byte(`@base64d`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: Base64InputFormat}))
lexer.Add([]byte(`fromyaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`fromjson`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`fromxml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XMLInputFormat}))
lexer.Add([]byte(`fromprops`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: PropertiesInputFormat}))
lexer.Add([]byte(`from_yaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`from_json`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`from_xml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XMLInputFormat}))
lexer.Add([]byte(`from_props`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: PropertiesInputFormat}))
lexer.Add([]byte(`@yamld`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`@jsond`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`@xmld`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XMLInputFormat}))
lexer.Add([]byte(`@propsd`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: PropertiesInputFormat}))
lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType))
lexer.Add([]byte(`sort_keys`), opToken(sortKeysOpType))
lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()}))
lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)}))
lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)}))
lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)}))
lexer.Add([]byte(`load_base64`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewBase64Decoder()}))
lexer.Add([]byte(`load_props`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewPropertiesDecoder()}))
lexer.Add([]byte(`loadprops`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewPropertiesDecoder()}))
lexer.Add([]byte(`strload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
lexer.Add([]byte(`load_str`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
lexer.Add([]byte(`loadstr`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
lexer.Add([]byte(`select`), opToken(selectOpType))
lexer.Add([]byte(`has`), opToken(hasOpType))
lexer.Add([]byte(`unique`), opToken(uniqueOpType))
lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType))
lexer.Add([]byte(`group_by`), opToken(groupByOpType))
lexer.Add([]byte(`explode`), opToken(explodeOpType))
lexer.Add([]byte(`or`), opToken(orOpType))
lexer.Add([]byte(`and`), opToken(andOpType))
lexer.Add([]byte(`not`), opToken(notOpType))
lexer.Add([]byte(`ireduce`), opToken(reduceOpType))
lexer.Add([]byte(`;`), opToken(blockOpType))
lexer.Add([]byte(`\/\/`), opToken(alternativeOpType))
lexer.Add([]byte(`documentIndex`), opToken(getDocumentIndexOpType))
lexer.Add([]byte(`document_index`), opToken(getDocumentIndexOpType))
lexer.Add([]byte(`di`), opToken(getDocumentIndexOpType))
lexer.Add([]byte(`splitDoc`), opToken(splitDocumentOpType))
lexer.Add([]byte(`split_doc`), opToken(splitDocumentOpType))
lexer.Add([]byte(`join`), opToken(joinStringOpType))
lexer.Add([]byte(`sub`), opToken(subStringOpType))
lexer.Add([]byte(`match`), opToken(matchOpType))
lexer.Add([]byte(`capture`), opToken(captureOpType))
lexer.Add([]byte(`test`), opToken(testOpType))
lexer.Add([]byte(`upcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true}))
lexer.Add([]byte(`ascii_upcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true}))
lexer.Add([]byte(`downcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false}))
lexer.Add([]byte(`ascii_downcase`), opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false}))
lexer.Add([]byte(`sort`), opToken(sortOpType))
lexer.Add([]byte(`sort_by`), opToken(sortByOpType))
lexer.Add([]byte(`reverse`), opToken(reverseOpType))
lexer.Add([]byte(`any`), opToken(anyOpType))
lexer.Add([]byte(`any_c`), opToken(anyConditionOpType))
lexer.Add([]byte(`all`), opToken(allOpType))
lexer.Add([]byte(`all_c`), opToken(allConditionOpType))
lexer.Add([]byte(`contains`), opToken(containsOpType))
lexer.Add([]byte(`split`), opToken(splitStringOpType))
lexer.Add([]byte(`parent`), opToken(getParentOpType))
lexer.Add([]byte(`key`), opToken(getKeyOpType))
lexer.Add([]byte(`keys`), opToken(keysOpType))
lexer.Add([]byte(`style`), opAssignableToken(getStyleOpType, assignStyleOpType))
lexer.Add([]byte(`tag|type`), opAssignableToken(getTagOpType, assignTagOpType))
lexer.Add([]byte(`anchor`), opAssignableToken(getAnchorOpType, assignAnchorOpType))
lexer.Add([]byte(`alias`), opAssignableToken(getAliasOptype, assignAliasOpType))
lexer.Add([]byte(`filename`), opToken(getFilenameOpType))
lexer.Add([]byte(`fileIndex`), opToken(getFileIndexOpType))
lexer.Add([]byte(`file_index`), opToken(getFileIndexOpType))
lexer.Add([]byte(`fi`), opToken(getFileIndexOpType))
lexer.Add([]byte(`path`), opToken(getPathOpType))
lexer.Add([]byte(`to_entries`), opToken(toEntriesOpType))
lexer.Add([]byte(`from_entries`), opToken(fromEntriesOpType))
lexer.Add([]byte(`with_entries`), opToken(withEntriesOpType))
lexer.Add([]byte(`with`), opToken(withOpType))
lexer.Add([]byte(`lineComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true}))
lexer.Add([]byte(`line_comment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true}))
lexer.Add([]byte(`headComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true}))
lexer.Add([]byte(`head_comment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true}))
lexer.Add([]byte(`footComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true}))
lexer.Add([]byte(`foot_comment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true}))
lexer.Add([]byte(`comments\s*=`), assignAllCommentsOp(false))
lexer.Add([]byte(`comments\s*\|=`), assignAllCommentsOp(true))
lexer.Add([]byte(`collect`), opToken(collectOpType))
lexer.Add([]byte(`\s*==\s*`), opToken(equalsOpType))
lexer.Add([]byte(`\s*!=\s*`), opToken(notEqualsOpType))
lexer.Add([]byte(`\s*>=\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: true}))
lexer.Add([]byte(`\s*>\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: true}))
lexer.Add([]byte(`\s*<=\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: false}))
lexer.Add([]byte(`\s*<\s*`), opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: false}))
lexer.Add([]byte(`\s*=\s*`), assignOpToken(false))
lexer.Add([]byte(`del`), opToken(deleteChildOpType))
lexer.Add([]byte(`\s*\|=\s*`), assignOpToken(true))
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
lexer.Add([]byte(`\."[^ "]+"\??`), pathToken(true))
lexer.Add([]byte(`\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`), pathToken(false))
lexer.Add([]byte(`\.`), selfToken())
lexer.Add([]byte(`\|`), opToken(pipeOpType))
lexer.Add([]byte(`0[xX][0-9A-Fa-f]+`), hexValue())
lexer.Add([]byte(`-?\d+(\.\d+)`), floatValue())
lexer.Add([]byte(`-?[1-9](\.\d+)?[Ee][-+]?\d+`), floatValue())
lexer.Add([]byte(`-?\d+`), numberValue())
lexer.Add([]byte(`[Tt][Rr][Uu][Ee]`), booleanValue(true))
lexer.Add([]byte(`[Ff][Aa][Ll][Ss][Ee]`), booleanValue(false))
lexer.Add([]byte(`[Nn][Uu][Ll][Ll]`), nullValue())
lexer.Add([]byte(`~`), nullValue())
lexer.Add([]byte(`"([^"\\]*(\\.[^"\\]*)*)"`), stringValue(true))
lexer.Add([]byte(`strenv\([^\)]+\)`), envOp(true))
lexer.Add([]byte(`env\([^\)]+\)`), envOp(false))
lexer.Add([]byte(`envsubst\((ne|nu|ff| |,)+\)`), envSubstWithOptions())
lexer.Add([]byte(`envsubst`), opToken(envsubstOpType))
lexer.Add([]byte(`\[`), literalToken(openCollect, false))
lexer.Add([]byte(`\]\??`), literalToken(closeCollect, true))
lexer.Add([]byte(`\{`), literalToken(openCollectObject, false))
lexer.Add([]byte(`\}`), literalToken(closeCollectObject, true))
lexer.Add([]byte(`\*=[\+|\?dn]*`), multiplyWithPrefs(multiplyAssignOpType))
lexer.Add([]byte(`\*[\+|\?dn]*`), multiplyWithPrefs(multiplyOpType))
lexer.Add([]byte(`\+`), opToken(addOpType))
lexer.Add([]byte(`\+=`), opToken(addAssignOpType))
lexer.Add([]byte(`\-`), opToken(subtractOpType))
lexer.Add([]byte(`\-=`), opToken(subtractAssignOpType))
lexer.Add([]byte(`\$[a-zA-Z_-0-9]+`), getVariableOpToken())
lexer.Add([]byte(`as`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{}))
lexer.Add([]byte(`ref`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true}))
err := lexer.CompileNFA()
if err != nil {
return nil, err
}
return lexer, nil
}
type expressionTokeniser interface {
Tokenise(expression string) ([]*token, error)
}
type expressionTokeniserImpl struct {
lexer *lex.Lexer
}
func newExpressionTokeniser() expressionTokeniser {
var lexer, err = initLexer()
if err != nil {
panic(err)
}
return &expressionTokeniserImpl{lexer}
}
func (p *expressionTokeniserImpl) Tokenise(expression string) ([]*token, error) {
scanner, err := p.lexer.Scanner([]byte(expression))
if err != nil {
return nil, fmt.Errorf("parsing expression: %w", err)
}
var tokens []*token
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if tok != nil {
currentToken := tok.(*token)
log.Debugf("Tokenising %v", currentToken.toString(true))
tokens = append(tokens, currentToken)
}
if err != nil {
return nil, fmt.Errorf("parsing expression: %w", err)
}
}
var postProcessedTokens = make([]*token, 0)
skipNextToken := false
for index := range tokens {
if skipNextToken {
skipNextToken = false
} else {
postProcessedTokens, skipNextToken = p.handleToken(tokens, index, postProcessedTokens)
}
}
return postProcessedTokens, nil
}
func (p *expressionTokeniserImpl) handleToken(tokens []*token, index int, postProcessedTokens []*token) (tokensAccum []*token, skipNextToken bool) {
skipNextToken = false
currentToken := tokens[index]
log.Debug("processing %v", currentToken.toString(true))
if currentToken.TokenType == traverseArrayCollect {
// `.[exp]`` works by creating a traversal array of [self, exp] and piping that into the traverse array operator
//need to put a traverse array then a collect currentToken
// do this by adding traverse then converting currentToken to collect
log.Debug(" adding self")
op := &Operation{OperationType: selfReferenceOpType, StringValue: "SELF"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
log.Debug(" adding traverse array")
op = &Operation{OperationType: traverseArrayOpType, StringValue: "TRAVERSE_ARRAY"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
currentToken = &token{TokenType: openCollect}
}
if index != len(tokens)-1 && currentToken.AssignOperation != nil &&
tokens[index+1].TokenType == operationToken &&
tokens[index+1].Operation.OperationType == assignOpType {
log.Debug(" its an update assign")
currentToken.Operation = currentToken.AssignOperation
currentToken.Operation.UpdateAssign = tokens[index+1].Operation.UpdateAssign
skipNextToken = true
}
log.Debug(" adding token to the fixed list")
postProcessedTokens = append(postProcessedTokens, currentToken)
if index != len(tokens)-1 &&
((currentToken.TokenType == openCollect && tokens[index+1].TokenType == closeCollect) ||
(currentToken.TokenType == openCollectObject && tokens[index+1].TokenType == closeCollectObject)) {
log.Debug(" adding empty")
op := &Operation{OperationType: emptyOpType, StringValue: "EMPTY"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
if index != len(tokens)-1 && currentToken.CheckForPostTraverse &&
((tokens[index+1].TokenType == operationToken && (tokens[index+1].Operation.OperationType == traversePathOpType)) ||
(tokens[index+1].TokenType == traverseArrayCollect)) {
log.Debug(" adding pipe because the next thing is traverse")
op := &Operation{OperationType: shortPipeOpType, Value: "PIPE", StringValue: "."}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
if index != len(tokens)-1 && currentToken.CheckForPostTraverse &&
tokens[index+1].TokenType == openCollect {
log.Debug(" adding traverArray because next is opencollect")
op := &Operation{OperationType: traverseArrayOpType}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
return postProcessedTokens, skipNextToken
}

159
pkg/yqlib/lexer.go Normal file
View File

@ -0,0 +1,159 @@
package yqlib
import (
"fmt"
"regexp"
"strconv"
)
type expressionTokeniser interface {
Tokenise(expression string) ([]*token, error)
}
type tokenType uint32
const (
operationToken = 1 << iota
openBracket
closeBracket
openCollect
closeCollect
openCollectObject
closeCollectObject
traverseArrayCollect
)
type token struct {
TokenType tokenType
Operation *Operation
AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it
CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat
Match string
}
func (t *token) toString(detail bool) string {
if t.TokenType == operationToken {
if detail {
return fmt.Sprintf("%v (%v)", t.Operation.toString(), t.Operation.OperationType.Precedence)
}
return t.Operation.toString()
} else if t.TokenType == openBracket {
return "("
} else if t.TokenType == closeBracket {
return ")"
} else if t.TokenType == openCollect {
return "["
} else if t.TokenType == closeCollect {
return "]"
} else if t.TokenType == openCollectObject {
return "{"
} else if t.TokenType == closeCollectObject {
return "}"
} else if t.TokenType == traverseArrayCollect {
return ".["
} else {
return "NFI"
}
}
func unwrap(value string) string {
return value[1 : len(value)-1]
}
func extractNumberParameter(value string) (int, error) {
parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`)
matches := parameterParser.FindStringSubmatch(value)
var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32)
if errParsingInt != nil {
return 0, errParsingInt
}
return int(indent), nil
}
func hasOptionParameter(value string, option string) bool {
parameterParser := regexp.MustCompile(`.*\([^\)]*\)`)
matches := parameterParser.FindStringSubmatch(value)
if len(matches) == 0 {
return false
}
parameterString := matches[0]
optionParser := regexp.MustCompile(fmt.Sprintf("\\b%v\\b", option))
return len(optionParser.FindStringSubmatch(parameterString)) > 0
}
func postProcessTokens(tokens []*token) []*token {
var postProcessedTokens = make([]*token, 0)
skipNextToken := false
for index := range tokens {
if skipNextToken {
skipNextToken = false
} else {
postProcessedTokens, skipNextToken = handleToken(tokens, index, postProcessedTokens)
}
}
return postProcessedTokens
}
func handleToken(tokens []*token, index int, postProcessedTokens []*token) (tokensAccum []*token, skipNextToken bool) {
skipNextToken = false
currentToken := tokens[index]
log.Debug("processing %v", currentToken.toString(true))
if currentToken.TokenType == traverseArrayCollect {
// `.[exp]`` works by creating a traversal array of [self, exp] and piping that into the traverse array operator
//need to put a traverse array then a collect currentToken
// do this by adding traverse then converting currentToken to collect
log.Debug(" adding self")
op := &Operation{OperationType: selfReferenceOpType, StringValue: "SELF"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
log.Debug(" adding traverse array")
op = &Operation{OperationType: traverseArrayOpType, StringValue: "TRAVERSE_ARRAY"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
currentToken = &token{TokenType: openCollect}
}
if index != len(tokens)-1 && currentToken.AssignOperation != nil &&
tokens[index+1].TokenType == operationToken &&
tokens[index+1].Operation.OperationType == assignOpType {
log.Debug(" its an update assign")
currentToken.Operation = currentToken.AssignOperation
currentToken.Operation.UpdateAssign = tokens[index+1].Operation.UpdateAssign
skipNextToken = true
}
log.Debug(" adding token to the fixed list")
postProcessedTokens = append(postProcessedTokens, currentToken)
if index != len(tokens)-1 &&
((currentToken.TokenType == openCollect && tokens[index+1].TokenType == closeCollect) ||
(currentToken.TokenType == openCollectObject && tokens[index+1].TokenType == closeCollectObject)) {
log.Debug(" adding empty")
op := &Operation{OperationType: emptyOpType, StringValue: "EMPTY"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
if index != len(tokens)-1 && currentToken.CheckForPostTraverse &&
((tokens[index+1].TokenType == operationToken && (tokens[index+1].Operation.OperationType == traversePathOpType)) ||
(tokens[index+1].TokenType == traverseArrayCollect)) {
log.Debug(" adding pipe because the next thing is traverse")
op := &Operation{OperationType: shortPipeOpType, Value: "PIPE", StringValue: "."}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
if index != len(tokens)-1 && currentToken.CheckForPostTraverse &&
tokens[index+1].TokenType == openCollect {
log.Debug(" adding traverArray because next is opencollect")
op := &Operation{OperationType: traverseArrayOpType}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
return postProcessedTokens, skipNextToken
}

View File

@ -0,0 +1,530 @@
package yqlib
import (
"strconv"
"strings"
"github.com/alecthomas/participle/v2/lexer"
)
var participleYqRules = []*participleYqRule{
{"LINE_COMMENT", `line_?comment|lineComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true}), 0},
{"HEAD_COMMENT", `head_?comment|headComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true}), 0},
{"FOOT_COMMENT", `foot_?comment|footComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true}), 0},
{"OpenBracket", `\(`, literalToken(openBracket, false), 0},
{"CloseBracket", `\)`, literalToken(closeBracket, true), 0},
{"OpenTraverseArrayCollect", `\.\[`, literalToken(traverseArrayCollect, false), 0},
{"OpenCollect", `\[`, literalToken(openCollect, false), 0},
{"CloseCollect", `\]\??`, literalToken(closeCollect, true), 0},
{"OpenCollectObject", `\{`, literalToken(openCollectObject, false), 0},
{"CloseCollectObject", `\}`, literalToken(closeCollectObject, true), 0},
{"RecursiveDecentIncludingKeys", `\.\.\.`, recursiveDecentOpToken(true), 0},
{"RecursiveDecent", `\.\.`, recursiveDecentOpToken(false), 0},
{"GetVariable", `\$[a-zA-Z_\-0-9]+`, getVariableOpToken(), 0},
{"AsignAsVariable", `as`, opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{}), 0},
{"AsignRefVariable", `ref`, opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true}), 0},
{"CreateMap", `:\s*`, opToken(createMapOpType), 0},
simpleOp("length", lengthOpType),
simpleOp("line", lineOpType),
simpleOp("column", columnOpType),
simpleOp("eval", evalOpType),
{"MapValues", `map_?values`, opToken(mapValuesOpType), 0},
simpleOp("map", mapOpType),
simpleOp("pick", pickOpType),
{"FlattenWithDepth", `flatten\([0-9]+\)`, flattenWithDepth(), 0},
{"Flatten", `flatten`, opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1}), 0},
simpleOp("format_datetime", formatDateTimeOpType),
simpleOp("now", nowOpType),
simpleOp("tz", tzOpType),
simpleOp("with_dtf", withDtFormatOpType),
simpleOp("error", errorOpType),
simpleOp("sortKeys", sortKeysOpType),
simpleOp("sort_?keys", sortKeysOpType),
{"YamlEncodeWithIndent", `to_?yaml\([0-9]+\)`, encodeParseIndent(YamlOutputFormat), 0},
{"XMLEncodeWithIndent", `to_?xml\([0-9]+\)`, encodeParseIndent(XMLOutputFormat), 0},
{"JSONEncodeWithIndent", `to_?json\([0-9]+\)`, encodeParseIndent(JSONOutputFormat), 0},
{"YamlDecode", `from_?yaml|@yamld|from_?json|@jsond`, decodeOp(YamlInputFormat), 0},
{"YamlEncode", `to_?yaml|@yaml`, encodeWithIndent(YamlOutputFormat, 2), 0},
{"JSONEncode", `to_?json`, encodeWithIndent(JSONOutputFormat, 2), 0},
{"JSONEncodeNoIndent", `@json`, encodeWithIndent(JSONOutputFormat, 0), 0},
{"PropertiesDecode", `from_?props|@propsd`, decodeOp(PropertiesInputFormat), 0},
{"PropsEncode", `to_?props|@props`, encodeWithIndent(PropsOutputFormat, 2), 0},
{"XmlDecode", `from_?xml|@xmld`, decodeOp(XMLInputFormat), 0},
{"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0},
{"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0},
{"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0},
{"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0},
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},
{"Base64", `@base64`, encodeWithIndent(Base64OutputFormat, 0), 0},
{"LoadXML", `load_?xml|xml_?load`, loadOp(NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken), false), 0},
{"LoadBase64", `load_?base64`, loadOp(NewBase64Decoder(), false), 0},
{"LoadProperties", `load_?props`, loadOp(NewPropertiesDecoder(), false), 0},
{"LoadString", `load_?str|str_?load`, loadOp(nil, true), 0},
{"LoadYaml", `load`, loadOp(NewYamlDecoder(), false), 0},
{"SplitDocument", `splitDoc|split_?doc`, opToken(splitDocumentOpType), 0},
simpleOp("select", selectOpType),
simpleOp("has", hasOpType),
simpleOp("unique_?by", uniqueByOpType),
simpleOp("unique", uniqueOpType),
simpleOp("group_?by", groupByOpType),
simpleOp("explode", explodeOpType),
simpleOp("or", orOpType),
simpleOp("and", andOpType),
simpleOp("not", notOpType),
simpleOp("ireduce", reduceOpType),
simpleOp("join", joinStringOpType),
simpleOp("sub", subStringOpType),
simpleOp("match", matchOpType),
simpleOp("capture", captureOpType),
simpleOp("test", testOpType),
simpleOp("sort_?by", sortByOpType),
simpleOp("sort", sortOpType),
simpleOp("reverse", reverseOpType),
simpleOp("any_c", anyConditionOpType),
simpleOp("any", anyOpType),
simpleOp("all_c", allConditionOpType),
simpleOp("all", allOpType),
simpleOp("contains", containsOpType),
simpleOp("split", splitStringOpType),
simpleOp("parent", getParentOpType),
simpleOp("keys", keysOpType),
simpleOp("key", getKeyOpType),
simpleOp("file_?name|fileName", getFilenameOpType),
simpleOp("file_?index|fileIndex|fi", getFileIndexOpType),
simpleOp("path", getPathOpType),
simpleOp("to_?entries|toEntries", toEntriesOpType),
simpleOp("from_?entries|fromEntries", fromEntriesOpType),
simpleOp("with_?entries|withEntries", withEntriesOpType),
simpleOp("with", withOpType),
simpleOp("collect", collectOpType),
simpleOp("del", deleteChildOpType),
assignableOp("style", getStyleOpType, assignStyleOpType),
assignableOp("tag|type", getTagOpType, assignTagOpType),
assignableOp("anchor", getAnchorOpType, assignAnchorOpType),
assignableOp("alias", getAliasOpType, assignAliasOpType),
{"ALL_COMMENTS", `comments\s*=`, assignAllCommentsOp(false), 0},
{"ALL_COMMENTS_ASSIGN_RELATIVE", `comments\s*\|=`, assignAllCommentsOp(true), 0},
{"Block", `;`, opToken(blockOpType), 0},
{"Alternative", `\/\/`, opToken(alternativeOpType), 0},
{"DocumentIndex", `documentIndex|document_?index|di`, opToken(getDocumentIndexOpType), 0},
{"Uppercase", `upcase|ascii_?upcase`, opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true}), 0},
{"Downcase", `downcase|ascii_?downcase`, opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false}), 0},
{"HexValue", `0[xX][0-9A-Fa-f]+`, hexValue(), 0},
{"FloatValueScientific", `-?[1-9](\.\d+)?[Ee][-+]?\d+`, floatValue(), 0},
{"FloatValue", `-?\d+(\.\d+)`, floatValue(), 0},
{"NumberValue", `-?\d+`, numberValue(), 0},
{"TrueBooleanValue", `[Tt][Rr][Uu][Ee]`, booleanValue(true), 0},
{"FalseBooleanValue", `[Ff][Aa][Ll][Ss][Ee]`, booleanValue(false), 0},
{"NullValue", `[Nn][Uu][Ll][Ll]|~`, nullValue(), 0},
{"QuotedStringValue", `"([^"\\]*(\\.[^"\\]*)*)"`, stringValue(), 0},
{"StrEnvOp", `strenv\([^\)]+\)`, envOp(true), 0},
{"EnvOp", `env\([^\)]+\)`, envOp(false), 0},
{"EnvSubstWithOptions", `envsubst\((ne|nu|ff| |,)+\)`, envSubstWithOptions(), 0},
simpleOp("envsubst", envsubstOpType),
{"Equals", `\s*==\s*`, opToken(equalsOpType), 0},
{"NotEquals", `\s*!=\s*`, opToken(notEqualsOpType), 0},
{"GreaterThanEquals", `\s*>=\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: true}), 0},
{"LessThanEquals", `\s*<=\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: false}), 0},
{"GreaterThan", `\s*>\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: true}), 0},
{"LessThan", `\s*<\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: false}), 0},
{"AssignRelative", `\|=`, assignOpToken(true), 0},
{"Assign", `=`, assignOpToken(false), 0},
{`whitespace`, `[ \t\n]+`, nil, 0},
{"WrappedPathElement", `\."[^ "]+"\??`, pathToken(true), 0},
{"PathElement", `\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`, pathToken(false), 0},
{"Pipe", `\|`, opToken(pipeOpType), 0},
{"Self", `\.`, opToken(selfReferenceOpType), 0},
{"Union", `,`, opToken(unionOpType), 0},
{"MultiplyAssign", `\*=[\+|\?dn]*`, multiplyWithPrefs(multiplyAssignOpType), 0},
{"Multiply", `\*[\+|\?dn]*`, multiplyWithPrefs(multiplyOpType), 0},
{"AddAssign", `\+=`, opToken(addAssignOpType), 0},
{"Add", `\+`, opToken(addOpType), 0},
{"SubtractAssign", `\-=`, opToken(subtractAssignOpType), 0},
{"Subtract", `\-`, opToken(subtractOpType), 0},
}
type yqAction func(lexer.Token) (*token, error)
type participleYqRule struct {
Name string
Pattern string
CreateYqToken yqAction
ParticipleTokenType lexer.TokenType
}
type participleLexer struct {
lexerDefinition lexer.StringDefinition
}
func simpleOp(name string, opType *operationType) *participleYqRule {
return &participleYqRule{strings.ToUpper(string(name[1])) + name[1:], name, opToken(opType), 0}
}
func assignableOp(name string, opType *operationType, assignOpType *operationType) *participleYqRule {
return &participleYqRule{strings.ToUpper(string(name[1])) + name[1:], name, opTokenWithPrefs(opType, assignOpType, nil), 0}
}
func newParticipleLexer() expressionTokeniser {
simpleRules := make([]lexer.SimpleRule, len(participleYqRules))
for i, yqRule := range participleYqRules {
simpleRules[i] = lexer.SimpleRule{Name: yqRule.Name, Pattern: yqRule.Pattern}
}
lexerDefinition := lexer.MustSimple(simpleRules)
symbols := lexerDefinition.Symbols()
for _, yqRule := range participleYqRules {
yqRule.ParticipleTokenType = symbols[yqRule.Name]
}
return &participleLexer{lexerDefinition}
}
func pathToken(wrapped bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
prefs := traversePreferences{}
if value[len(value)-1:] == "?" {
prefs.OptionalTraverse = true
value = value[:len(value)-1]
}
value = value[1:]
if wrapped {
value = unwrap(value)
}
log.Debug("PathToken %v", value)
op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil
}
}
func recursiveDecentOpToken(includeMapKeys bool) yqAction {
prefs := recursiveDescentPreferences{
RecurseArray: true,
TraversePreferences: traversePreferences{
DontFollowAlias: true,
IncludeMapKeys: includeMapKeys,
},
}
return opTokenWithPrefs(recursiveDescentOpType, nil, prefs)
}
func opTokenWithPrefs(opType *operationType, assignOpType *operationType, preferences interface{}) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
op := &Operation{OperationType: opType, Value: opType.Type, StringValue: value, Preferences: preferences}
var assign *Operation
if assignOpType != nil {
assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences}
}
return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil
}
}
func flattenWithDepth() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
var depth, errParsingInt = extractNumberParameter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := flattenPreferences{depth: depth}
op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func assignAllCommentsOp(updateAssign bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
log.Debug("assignAllCommentsOp %v", rawToken.Value)
value := rawToken.Value
op := &Operation{
OperationType: assignCommentOpType,
Value: assignCommentOpType.Type,
StringValue: value,
UpdateAssign: updateAssign,
Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true},
}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func assignOpToken(updateAssign bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
log.Debug("assignOpToken %v", rawToken.Value)
value := rawToken.Value
prefs := assignPreferences{DontOverWriteAnchor: true}
op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func booleanValue(val bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
return &token{TokenType: operationToken, Operation: createValueOperation(val, rawToken.Value)}, nil
}
}
func nullValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
return &token{TokenType: operationToken, Operation: createValueOperation(nil, rawToken.Value)}, nil
}
}
func stringValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := unwrap(rawToken.Value)
value = strings.ReplaceAll(value, "\\\"", "\"")
return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil
}
}
func envOp(strenv bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
preferences := envOpPreferences{}
if strenv {
// strenv( )
value = value[7 : len(value)-1]
preferences.StringValue = true
} else {
//env( )
value = value[4 : len(value)-1]
}
envOperation := createValueOperation(value, value)
envOperation.OperationType = envOpType
envOperation.Preferences = preferences
return &token{TokenType: operationToken, Operation: envOperation}, nil
}
}
func envSubstWithOptions() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
noEmpty := hasOptionParameter(value, "ne")
noUnset := hasOptionParameter(value, "nu")
failFast := hasOptionParameter(value, "ff")
envsubstOpType.Type = "ENVSUBST"
prefs := envOpPreferences{NoUnset: noUnset, NoEmpty: noEmpty, FailFast: failFast}
if noEmpty {
envsubstOpType.Type = envsubstOpType.Type + "_NO_EMPTY"
}
if noUnset {
envsubstOpType.Type = envsubstOpType.Type + "_NO_UNSET"
}
op := &Operation{OperationType: envsubstOpType, Value: envsubstOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func multiplyWithPrefs(op *operationType) yqAction {
return func(rawToken lexer.Token) (*token, error) {
prefs := multiplyPreferences{}
options := rawToken.Value
if strings.Contains(options, "+") {
prefs.AppendArrays = true
}
if strings.Contains(options, "?") {
prefs.TraversePrefs = traversePreferences{DontAutoCreate: true}
}
if strings.Contains(options, "n") {
prefs.AssignPrefs = assignPreferences{OnlyWriteNull: true}
}
if strings.Contains(options, "d") {
prefs.DeepMergeArrays = true
}
prefs.TraversePrefs.DontFollowAlias = true
op := &Operation{OperationType: op, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func getVariableOpToken() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
value = value[1:]
getVarOperation := createValueOperation(value, value)
getVarOperation.OperationType = getVariableOpType
return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil
}
}
func hexValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
var originalString = rawToken.Value
var numberString = originalString[2:]
log.Debugf("numberString: %v", numberString)
var number, errParsingInt = strconv.ParseInt(numberString, 16, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil
}
}
func floatValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
var numberString = rawToken.Value
var number, errParsingInt = strconv.ParseFloat(numberString, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
}
}
func numberValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
var numberString = rawToken.Value
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
}
}
func encodeParseIndent(outputFormat PrinterOutputFormat) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
var indent, errParsingInt = extractNumberParameter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := encoderPreferences{format: outputFormat, indent: indent}
op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func encodeWithIndent(outputFormat PrinterOutputFormat, indent int) yqAction {
prefs := encoderPreferences{format: outputFormat, indent: indent}
return opTokenWithPrefs(encodeOpType, nil, prefs)
}
func decodeOp(inputFormat InputFormat) yqAction {
prefs := decoderPreferences{format: inputFormat}
return opTokenWithPrefs(decodeOpType, nil, prefs)
}
func loadOp(decoder Decoder, loadAsString bool) yqAction {
prefs := loadPrefs{decoder: decoder, loadAsString: loadAsString}
return opTokenWithPrefs(loadOpType, nil, prefs)
}
func opToken(op *operationType) yqAction {
return opTokenWithPrefs(op, nil, nil)
}
func literalToken(tt tokenType, checkForPost bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
return &token{TokenType: tt, CheckForPostTraverse: checkForPost, Match: rawToken.Value}, nil
}
}
func (p *participleLexer) getYqDefinition(rawToken lexer.Token) *participleYqRule {
for _, yqRule := range participleYqRules {
if yqRule.ParticipleTokenType == rawToken.Type {
return yqRule
}
}
return &participleYqRule{}
}
func (p *participleLexer) Tokenise(expression string) ([]*token, error) {
myLexer, err := p.lexerDefinition.LexString("", expression)
if err != nil {
return nil, err
}
tokens := make([]*token, 0)
for {
rawToken, e := myLexer.Next()
if e != nil {
return nil, e
} else if rawToken.Type == lexer.EOF {
return postProcessTokens(tokens), nil
}
definition := p.getYqDefinition(rawToken)
if definition.CreateYqToken != nil {
token, e := definition.CreateYqToken(rawToken)
if e != nil {
return nil, e
}
tokens = append(tokens, token)
}
}
}

View File

@ -0,0 +1,538 @@
package yqlib
import (
"testing"
"github.com/alecthomas/repr"
"github.com/mikefarah/yq/v4/test"
"gopkg.in/yaml.v3"
)
type participleLexerScenario struct {
expression string
tokens []*token
}
var participleLexerScenarios = []participleLexerScenario{
{
expression: ".a",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
},
},
{
expression: ".a.b",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: shortPipeOpType,
Value: "PIPE",
StringValue: ".",
Preferences: nil,
},
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "b",
StringValue: "b",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
},
},
{
expression: ".a.b?",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: shortPipeOpType,
Value: "PIPE",
StringValue: ".",
Preferences: nil,
},
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "b",
StringValue: "b",
Preferences: traversePreferences{
OptionalTraverse: true,
},
},
CheckForPostTraverse: true,
},
},
},
{
expression: `.a."b?"`,
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: shortPipeOpType,
Value: "PIPE",
StringValue: ".",
Preferences: nil,
},
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "b?",
StringValue: "b?",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
},
},
{
expression: ` .a ."b?"`,
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: shortPipeOpType,
Value: "PIPE",
StringValue: ".",
Preferences: nil,
},
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "b?",
StringValue: "b?",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
},
},
{
expression: `.a | .b`,
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: pipeOpType,
Value: "PIPE",
StringValue: "|",
Preferences: nil,
},
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "b",
StringValue: "b",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
},
},
{
expression: "(.a)",
tokens: []*token{
{
TokenType: openBracket,
Match: "(",
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: closeBracket,
Match: ")",
CheckForPostTraverse: true,
},
},
},
{
expression: "..",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: recursiveDescentOpType,
Value: "RECURSIVE_DESCENT",
StringValue: "..",
Preferences: recursiveDescentPreferences{
RecurseArray: true,
TraversePreferences: traversePreferences{
DontFollowAlias: true,
IncludeMapKeys: false,
},
},
},
},
},
},
{
expression: "...",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: recursiveDescentOpType,
Value: "RECURSIVE_DESCENT",
StringValue: "...",
Preferences: recursiveDescentPreferences{
RecurseArray: true,
TraversePreferences: traversePreferences{
DontFollowAlias: true,
IncludeMapKeys: true,
},
},
},
},
},
},
{
expression: ".a,.b",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "a",
StringValue: "a",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: unionOpType,
Value: "UNION",
StringValue: ",",
Preferences: nil,
},
},
{
TokenType: operationToken,
Operation: &Operation{
OperationType: traversePathOpType,
Value: "b",
StringValue: "b",
Preferences: traversePreferences{},
},
CheckForPostTraverse: true,
},
},
},
{
expression: "map_values",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: mapValuesOpType,
Value: "MAP_VALUES",
StringValue: "map_values",
Preferences: nil,
},
},
},
},
{
expression: "mapvalues",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: mapValuesOpType,
Value: "MAP_VALUES",
StringValue: "mapvalues",
Preferences: nil,
},
},
},
},
{
expression: "flatten(3)",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: flattenOpType,
Value: "FLATTEN_BY",
StringValue: "flatten(3)",
Preferences: flattenPreferences{depth: 3},
},
},
},
},
{
expression: "flatten",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: flattenOpType,
Value: "FLATTEN_BY",
StringValue: "flatten",
Preferences: flattenPreferences{depth: -1},
},
},
},
},
{
expression: "length",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: lengthOpType,
Value: "LENGTH",
StringValue: "length",
Preferences: nil,
},
},
},
},
{
expression: "format_datetime",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: formatDateTimeOpType,
Value: "FORMAT_DATE_TIME",
StringValue: "format_datetime",
Preferences: nil,
},
},
},
},
{
expression: "to_yaml(3)",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: encodeOpType,
Value: "ENCODE",
StringValue: "to_yaml(3)",
Preferences: encoderPreferences{
format: YamlOutputFormat,
indent: 3,
},
},
},
},
},
{
expression: "tojson(2)",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: encodeOpType,
Value: "ENCODE",
StringValue: "tojson(2)",
Preferences: encoderPreferences{
format: JSONOutputFormat,
indent: 2,
},
},
},
},
},
{
expression: "@yaml",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: encodeOpType,
Value: "ENCODE",
StringValue: "@yaml",
Preferences: encoderPreferences{
format: YamlOutputFormat,
indent: 2,
},
},
},
},
},
{
expression: "to_props",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: encodeOpType,
Value: "ENCODE",
StringValue: "to_props",
Preferences: encoderPreferences{
format: PropsOutputFormat,
indent: 2,
},
},
},
},
},
{
expression: "@base64d",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: decodeOpType,
Value: "DECODE",
StringValue: "@base64d",
Preferences: decoderPreferences{
format: Base64InputFormat,
},
},
},
},
},
{
expression: "@base64",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: encodeOpType,
Value: "ENCODE",
StringValue: "@base64",
Preferences: encoderPreferences{
format: Base64OutputFormat,
},
},
},
},
},
{
expression: "@yamld",
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: decodeOpType,
Value: "DECODE",
StringValue: "@yamld",
Preferences: decoderPreferences{
format: YamlInputFormat,
},
},
},
},
},
{
expression: `"string with a \""`,
tokens: []*token{
{
TokenType: operationToken,
Operation: &Operation{
OperationType: valueOpType,
Value: `string with a "`,
StringValue: `string with a "`,
Preferences: nil,
CandidateNode: &CandidateNode{
Node: &yaml.Node{
Kind: yaml.ScalarNode,
Tag: "!!str",
Value: `string with a "`,
},
},
},
},
},
},
}
func TestParticipleLexer(t *testing.T) {
log.Errorf("TestParticiple")
lexer := newParticipleLexer()
for _, scenario := range participleLexerScenarios {
actual, err := lexer.Tokenise(scenario.expression)
if err != nil {
t.Error(err)
} else {
test.AssertResultWithContext(t, repr.String(scenario.tokens, repr.Indent(" ")), repr.String(actual, repr.Indent(" ")), scenario.expression)
}
}
}

View File

@ -126,7 +126,7 @@ var getParentOpType = &operationType{Type: "GET_PARENT", NumArgs: 0, Precedence:
var getCommentOpType = &operationType{Type: "GET_COMMENT", NumArgs: 0, Precedence: 50, Handler: getCommentsOperator} var getCommentOpType = &operationType{Type: "GET_COMMENT", NumArgs: 0, Precedence: 50, Handler: getCommentsOperator}
var getAnchorOpType = &operationType{Type: "GET_ANCHOR", NumArgs: 0, Precedence: 50, Handler: getAnchorOperator} var getAnchorOpType = &operationType{Type: "GET_ANCHOR", NumArgs: 0, Precedence: 50, Handler: getAnchorOperator}
var getAliasOptype = &operationType{Type: "GET_ALIAS", NumArgs: 0, Precedence: 50, Handler: getAliasOperator} var getAliasOpType = &operationType{Type: "GET_ALIAS", NumArgs: 0, Precedence: 50, Handler: getAliasOperator}
var getDocumentIndexOpType = &operationType{Type: "GET_DOCUMENT_INDEX", NumArgs: 0, Precedence: 50, Handler: getDocumentIndexOperator} var getDocumentIndexOpType = &operationType{Type: "GET_DOCUMENT_INDEX", NumArgs: 0, Precedence: 50, Handler: getDocumentIndexOperator}
var getFilenameOpType = &operationType{Type: "GET_FILENAME", NumArgs: 0, Precedence: 50, Handler: getFilenameOperator} var getFilenameOpType = &operationType{Type: "GET_FILENAME", NumArgs: 0, Precedence: 50, Handler: getFilenameOperator}
var getFileIndexOpType = &operationType{Type: "GET_FILE_INDEX", NumArgs: 0, Precedence: 50, Handler: getFileIndexOperator} var getFileIndexOpType = &operationType{Type: "GET_FILE_INDEX", NumArgs: 0, Precedence: 50, Handler: getFileIndexOperator}