From cc7ea83506c697ceaf1761d5e11de54b9ea4526a Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Wed, 7 Jul 2021 22:40:46 +1000 Subject: [PATCH] (wip) regex match op --- pkg/yqlib/doc/Anchor and Alias Operators.md | 30 ++++++ pkg/yqlib/doc/String Operators.md | 80 +++++--------- pkg/yqlib/expression_tokeniser.go | 1 + pkg/yqlib/lib.go | 12 ++- pkg/yqlib/operator_anchors_aliases_test.go | 30 ++++++ pkg/yqlib/operator_strings.go | 77 ++++++++++++++ pkg/yqlib/operator_strings_test.go | 110 ++++++++++++-------- 7 files changed, 238 insertions(+), 102 deletions(-) diff --git a/pkg/yqlib/doc/Anchor and Alias Operators.md b/pkg/yqlib/doc/Anchor and Alias Operators.md index ecfa272c..00dc39dd 100644 --- a/pkg/yqlib/doc/Anchor and Alias Operators.md +++ b/pkg/yqlib/doc/Anchor and Alias Operators.md @@ -299,3 +299,33 @@ foobar: thing: foobar_thing ``` +## Dereference and update a field +`Use explode with multiply to dereference an object + +Given a sample.yml file of: +```yaml +item_value: &item_value + value: true +thingOne: + name: item_1 + !!merge <<: *item_value +thingTwo: + name: item_2 + !!merge <<: *item_value +``` +then +```bash +yq eval '.thingOne |= explode(.) * {"value": false}' sample.yml +``` +will output +```yaml +item_value: &item_value + value: true +thingOne: + name: item_1 + value: false +thingTwo: + name: item_2 + !!merge <<: *item_value +``` + diff --git a/pkg/yqlib/doc/String Operators.md b/pkg/yqlib/doc/String Operators.md index 1c3d0b93..f6d30637 100644 --- a/pkg/yqlib/doc/String Operators.md +++ b/pkg/yqlib/doc/String Operators.md @@ -1,88 +1,56 @@ # String Operators -## Join strings +## Match string Given a sample.yml file of: ```yaml -- cat -- meow -- 1 -- null -- true +cat ``` then ```bash -yq eval 'join("; ")' sample.yml +yq eval 'match("at")' sample.yml ``` will output ```yaml -cat; meow; 1; ; true +string: at +offset: 1 +length: 2 +captures: [] ``` -## Substitute / Replace string -This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax) -Note the use of `|=` to run in context of the current string value. - +## Match string, case insensitive Given a sample.yml file of: ```yaml -a: dogs are great +cAt ``` then ```bash -yq eval '.a |= sub("dogs", "cats")' sample.yml +yq eval 'match("(?i)at")' sample.yml ``` will output ```yaml -a: cats are great +string: At +offset: 1 +length: 2 +captures: [] ``` -## Substitute / Replace string with regex -This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax) -Note the use of `|=` to run in context of the current string value. - +## Match with capture groups Given a sample.yml file of: ```yaml -a: cat -b: heat +a cat ``` then ```bash -yq eval '.[] |= sub("(a)", "${1}r")' sample.yml +yq eval 'match("c(.t)")' sample.yml ``` will output ```yaml -a: cart -b: heart -``` - -## Split strings -Given a sample.yml file of: -```yaml -cat; meow; 1; ; true -``` -then -```bash -yq eval 'split("; ")' sample.yml -``` -will output -```yaml -- cat -- meow -- "1" -- "" -- "true" -``` - -## Split strings one match -Given a sample.yml file of: -```yaml -word -``` -then -```bash -yq eval 'split("; ")' sample.yml -``` -will output -```yaml -- word +string: cat +offset: 2 +length: 3 +captures: + - string: at + offset: 3 + length: 2 ``` diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go index 85e64375..856c49c9 100644 --- a/pkg/yqlib/expression_tokeniser.go +++ b/pkg/yqlib/expression_tokeniser.go @@ -276,6 +276,7 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`join`), opToken(joinStringOpType)) lexer.Add([]byte(`sub`), opToken(subStringOpType)) + lexer.Add([]byte(`match`), opToken(matchOpType)) lexer.Add([]byte(`any`), opToken(anyOpType)) lexer.Add([]byte(`any_c`), opToken(anyConditionOpType)) diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index ca511148..4a35554b 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -83,6 +83,7 @@ var explodeOpType = &operationType{Type: "EXPLODE", NumArgs: 1, Precedence: 50, var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 50, Handler: sortKeysOperator} var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator} var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator} +var matchOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: matchOperator} var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator} var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator} @@ -114,8 +115,8 @@ type Operation struct { UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs } -func createValueOperation(value interface{}, stringValue string) *Operation { - var node yaml.Node = yaml.Node{Kind: yaml.ScalarNode} +func createScalarNode(value interface{}, stringValue string) *yaml.Node { + var node = &yaml.Node{Kind: yaml.ScalarNode} node.Value = stringValue switch value.(type) { @@ -130,12 +131,17 @@ func createValueOperation(value interface{}, stringValue string) *Operation { case nil: node.Tag = "!!null" } + return node +} + +func createValueOperation(value interface{}, stringValue string) *Operation { + var node *yaml.Node = createScalarNode(value, stringValue) return &Operation{ OperationType: valueOpType, Value: value, StringValue: stringValue, - CandidateNode: &CandidateNode{Node: &node}, + CandidateNode: &CandidateNode{Node: node}, } } diff --git a/pkg/yqlib/operator_anchors_aliases_test.go b/pkg/yqlib/operator_anchors_aliases_test.go index a611c750..b9493f55 100644 --- a/pkg/yqlib/operator_anchors_aliases_test.go +++ b/pkg/yqlib/operator_anchors_aliases_test.go @@ -12,6 +12,29 @@ var specDocument = `- &CENTER { x: 1, y: 2 } var expectedSpecResult = "D0, P[4], (!!map)::x: 1\ny: 2\nr: 10\n" +var simpleArrayRef = ` +item_value: &item_value + value: true + +thingOne: + name: item_1 + <<: *item_value + +thingTwo: + name: item_2 + <<: *item_value +` + +var expectedUpdatedArrayRef = `D0, P[], (doc)::item_value: &item_value + value: true +thingOne: + name: item_1 + value: false +thingTwo: + name: item_2 + !!merge <<: *item_value +` + var anchorOperatorScenarios = []expressionScenario{ { description: "Merge one map", @@ -197,6 +220,13 @@ foobar: "D0, P[], (doc)::{f: {a: cat, b: {f: cat}, cat: {f: cat}}}\n", }, }, + { + description: "Dereference and update a field", + subdescription: "`Use explode with multiply to dereference an object", + document: simpleArrayRef, + expression: `.thingOne |= explode(.) * {"value": false}`, + expected: []string{expectedUpdatedArrayRef}, + }, } func TestAnchorAliasOperatorScenarios(t *testing.T) { diff --git a/pkg/yqlib/operator_strings.go b/pkg/yqlib/operator_strings.go index 272c1224..e5b24cd1 100644 --- a/pkg/yqlib/operator_strings.go +++ b/pkg/yqlib/operator_strings.go @@ -74,6 +74,83 @@ func substituteStringOperator(d *dataTreeNavigator, context Context, expressionN } +func addMatch(original []*yaml.Node, match string, offset int) []*yaml.Node { + return append(original, + createScalarNode("string", "string"), + createScalarNode(match, match), + createScalarNode("offset", "offset"), + createScalarNode(offset, fmt.Sprintf("%v", offset)), + createScalarNode("length", "length"), + createScalarNode(len(match), fmt.Sprintf("%v", len(match)))) +} + +func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) { + // captures = FindAllStringSubmatch + // FindAllStringSubmatchIndex = offset? + + //string array + // subNames := regEx.SubexpNames() + + //array of arrays + allMatches := regEx.FindAllStringSubmatch(value, -1) + allIndices := regEx.FindAllStringSubmatchIndex(value, -1) + + for i, matches := range allMatches { + capturesNode := &yaml.Node{Kind: yaml.SequenceNode} + match, submatches := matches[0], matches[1:] + for j, submatch := range submatches { + captureNode := &yaml.Node{Kind: yaml.MappingNode} + captureNode.Content = addMatch(capturesNode.Content, submatch, allIndices[i][2+j*2]) + capturesNode.Content = append(capturesNode.Content, captureNode) + } + + node := &yaml.Node{Kind: yaml.MappingNode} + node.Content = addMatch(node.Content, match, allIndices[i][0]) + node.Content = append(node.Content, + createScalarNode("captures", "captures"), + capturesNode, + ) + results.PushBack(candidate.CreateChild(nil, node)) + + } + +} + +func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { + //rhs block operator + //lhs of block = regex + + regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.Rhs) + if err != nil { + return Context{}, err + } + log.Debug(NodesToString(regExNodes.MatchingNodes)) + regExStr := "" + if regExNodes.MatchingNodes.Front() != nil { + regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value + } + log.Debug("regEx %v", regExStr) + + regEx, err := regexp.Compile(regExStr) + if err != nil { + return Context{}, err + } + + var results = list.New() + + for el := context.MatchingNodes.Front(); el != nil; el = el.Next() { + candidate := el.Value.(*CandidateNode) + node := unwrapDoc(candidate.Node) + if node.Tag != "!!str" { + return Context{}, fmt.Errorf("cannot substitute with %v, can only substitute strings. Hint: Most often you'll want to use '|=' over '=' for this operation.", node.Tag) + } + + match(regEx, candidate, node.Value, results) + } + + return context.ChildContext(results), nil +} + func joinStringOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { log.Debugf("-- joinStringOperator") joinStr := "" diff --git a/pkg/yqlib/operator_strings_test.go b/pkg/yqlib/operator_strings_test.go index 1b67eee5..3bd3b43b 100644 --- a/pkg/yqlib/operator_strings_test.go +++ b/pkg/yqlib/operator_strings_test.go @@ -5,61 +5,85 @@ import ( ) var stringsOperatorScenarios = []expressionScenario{ + // { + // description: "Join strings", + // document: `[cat, meow, 1, null, true]`, + // expression: `join("; ")`, + // expected: []string{ + // "D0, P[], (!!str)::cat; meow; 1; ; true\n", + // }, + // }, { - description: "Join strings", - document: `[cat, meow, 1, null, true]`, - expression: `join("; ")`, + description: "Match string", + document: `cat`, + expression: `match("at")`, expected: []string{ - "D0, P[], (!!str)::cat; meow; 1; ; true\n", + "D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n", }, }, { - description: "Substitute / Replace string", - subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", - document: `a: dogs are great`, - expression: `.a |= sub("dogs", "cats")`, + description: "Match string, case insensitive", + document: `cAt`, + expression: `match("(?i)at")`, expected: []string{ - "D0, P[], (doc)::a: cats are great\n", + "D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n", }, }, { - description: "Substitute / Replace string with regex", - subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", - document: "a: cat\nb: heat", - expression: `.[] |= sub("(a)", "${1}r")`, + description: "Match with capture groups", + document: `a cat`, + expression: `match("c(.t)")`, expected: []string{ - "D0, P[], (doc)::a: cart\nb: heart\n", + "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n", }, }, - { - description: "Split strings", - document: `"cat; meow; 1; ; true"`, - expression: `split("; ")`, - expected: []string{ - "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n", - }, - }, - { - description: "Split strings one match", - document: `"word"`, - expression: `split("; ")`, - expected: []string{ - "D0, P[], (!!seq)::- word\n", - }, - }, - { - skipDoc: true, - document: `""`, - expression: `split("; ")`, - expected: []string{ - "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error - }, - }, - { - skipDoc: true, - expression: `split("; ")`, - expected: []string{}, - }, + // { + // description: "Substitute / Replace string", + // subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", + // document: `a: dogs are great`, + // expression: `.a |= sub("dogs", "cats")`, + // expected: []string{ + // "D0, P[], (doc)::a: cats are great\n", + // }, + // }, + // { + // description: "Substitute / Replace string with regex", + // subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", + // document: "a: cat\nb: heat", + // expression: `.[] |= sub("(a)", "${1}r")`, + // expected: []string{ + // "D0, P[], (doc)::a: cart\nb: heart\n", + // }, + // }, + // { + // description: "Split strings", + // document: `"cat; meow; 1; ; true"`, + // expression: `split("; ")`, + // expected: []string{ + // "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n", + // }, + // }, + // { + // description: "Split strings one match", + // document: `"word"`, + // expression: `split("; ")`, + // expected: []string{ + // "D0, P[], (!!seq)::- word\n", + // }, + // }, + // { + // skipDoc: true, + // document: `""`, + // expression: `split("; ")`, + // expected: []string{ + // "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error + // }, + // }, + // { + // skipDoc: true, + // expression: `split("; ")`, + // expected: []string{}, + // }, } func TestStringsOperatorScenarios(t *testing.T) {