Added match operator

This commit is contained in:
Mike Farah 2021-07-09 15:33:41 +10:00
parent 573618e4ce
commit 69c45ff64a
4 changed files with 337 additions and 91 deletions

View File

@ -1,6 +1,81 @@
# String Operators # String Operators
## Match with names capture groups ## RegEx
This uses golangs native regex functions under the hood - See https://github.com/google/re2/wiki/Syntax for the supported syntax.
## Join strings
Given a sample.yml file of:
```yaml
- cat
- meow
- 1
- null
- true
```
then
```bash
yq eval 'join("; ")' sample.yml
```
will output
```yaml
cat; meow; 1; ; true
```
## Match string
Given a sample.yml file of:
```yaml
cat
```
then
```bash
yq eval 'match("at")' sample.yml
```
will output
```yaml
string: at
offset: 1
length: 2
captures: []
```
## Match string, case insensitive
Given a sample.yml file of:
```yaml
cAt
```
then
```bash
yq eval 'match("(?i)at")' sample.yml
```
will output
```yaml
string: At
offset: 1
length: 2
captures: []
```
## Match with capture groups
Given a sample.yml file of:
```yaml
a cat
```
then
```bash
yq eval 'match("c(.t)")' sample.yml
```
will output
```yaml
string: cat
offset: 2
length: 3
captures:
- string: at
offset: 3
length: 2
```
## Match with named capture groups
Given a sample.yml file of: Given a sample.yml file of:
```yaml ```yaml
a cat a cat
@ -21,3 +96,109 @@ captures:
name: cool name: cool
``` ```
## Match without global flag
Given a sample.yml file of:
```yaml
cat cat
```
then
```bash
yq eval 'match("cat")' sample.yml
```
will output
```yaml
string: cat
offset: 0
length: 3
captures: []
```
## Match with global flag
Given a sample.yml file of:
```yaml
cat cat
```
then
```bash
yq eval 'match("cat"; "g")' sample.yml
```
will output
```yaml
string: cat
offset: 0
length: 3
captures: []
string: cat
offset: 4
length: 3
captures: []
```
## Substitute / Replace string
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
Given a sample.yml file of:
```yaml
a: dogs are great
```
then
```bash
yq eval '.a |= sub("dogs", "cats")' sample.yml
```
will output
```yaml
a: cats are great
```
## Substitute / Replace string with regex
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
Given a sample.yml file of:
```yaml
a: cat
b: heat
```
then
```bash
yq eval '.[] |= sub("(a)", "${1}r")' sample.yml
```
will output
```yaml
a: cart
b: heart
```
## Split strings
Given a sample.yml file of:
```yaml
cat; meow; 1; ; true
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- cat
- meow
- "1"
- ""
- "true"
```
## Split strings one match
Given a sample.yml file of:
```yaml
word
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- word
```

View File

@ -1 +1,4 @@
# String Operators # String Operators
## RegEx
This uses golangs native regex functions under the hood - See https://github.com/google/re2/wiki/Syntax for the supported syntax.

View File

@ -92,13 +92,25 @@ func addMatch(original []*yaml.Node, match string, offset int, name string) []*y
return newContent return newContent
} }
func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) { type matchPreferences struct {
Global bool
}
func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
subNames := regEx.SubexpNames() subNames := regEx.SubexpNames()
log.Debugf("subNames %v", subNames) log.Debugf("subNames %v", subNames)
allMatches := regEx.FindAllStringSubmatch(value, -1) var allMatches [][]string
allIndices := regEx.FindAllStringSubmatchIndex(value, -1) var allIndices [][]int
if matchPrefs.Global {
allMatches = regEx.FindAllStringSubmatch(value, -1)
allIndices = regEx.FindAllStringSubmatchIndex(value, -1)
} else {
allMatches = [][]string{regEx.FindStringSubmatch(value)}
allIndices = [][]int{regEx.FindStringSubmatchIndex(value)}
}
for i, matches := range allMatches { for i, matches := range allMatches {
capturesNode := &yaml.Node{Kind: yaml.SequenceNode} capturesNode := &yaml.Node{Kind: yaml.SequenceNode}
@ -121,13 +133,38 @@ func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results
} }
func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { func extractMatchArguments(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (string, matchPreferences, error) {
//rhs block operator regExExpNode := expressionNode.Rhs
//lhs of block = regex
regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.Rhs) matchPrefs := matchPreferences{}
// we got given parameters e.g. match(exp; params)
if expressionNode.Rhs.Operation.OperationType == blockOpType {
block := expressionNode.Rhs
regExExpNode = block.Lhs
replacementNodes, err := d.GetMatchingNodes(context, block.Rhs)
if err != nil {
return "", matchPrefs, err
}
paramText := ""
if replacementNodes.MatchingNodes.Front() != nil {
paramText = replacementNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value
}
if strings.Contains(paramText, "g") {
paramText = strings.ReplaceAll(paramText, "g", "")
matchPrefs.Global = true
}
if strings.Contains(paramText, "i") {
return "", matchPrefs, fmt.Errorf(`'i' is not a valid option for match. To ignore case, use an expression like match("(?i)cat")`)
}
if len(paramText) > 0 {
return "", matchPrefs, fmt.Errorf(`Unrecognised match params '%v', please see docs at https://mikefarah.gitbook.io/yq/operators/string-operators`, paramText)
}
}
regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), regExExpNode)
if err != nil { if err != nil {
return Context{}, err return "", matchPrefs, err
} }
log.Debug(NodesToString(regExNodes.MatchingNodes)) log.Debug(NodesToString(regExNodes.MatchingNodes))
regExStr := "" regExStr := ""
@ -135,6 +172,14 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres
regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value
} }
log.Debug("regEx %v", regExStr) log.Debug("regEx %v", regExStr)
return regExStr, matchPrefs, nil
}
func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
regExStr, matchPrefs, err := extractMatchArguments(d, context, expressionNode)
if err != nil {
return Context{}, err
}
regEx, err := regexp.Compile(regExStr) regEx, err := regexp.Compile(regExStr)
if err != nil { if err != nil {
@ -147,10 +192,10 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres
candidate := el.Value.(*CandidateNode) candidate := el.Value.(*CandidateNode)
node := unwrapDoc(candidate.Node) node := unwrapDoc(candidate.Node)
if node.Tag != "!!str" { if node.Tag != "!!str" {
return Context{}, fmt.Errorf("cannot substitute with %v, can only substitute strings. Hint: Most often you'll want to use '|=' over '=' for this operation.", node.Tag) return Context{}, fmt.Errorf("cannot match with %v, can only match strings. Hint: Most often you'll want to use '|=' over '=' for this operation", node.Tag)
} }
match(regEx, candidate, node.Value, results) match(matchPrefs, regEx, candidate, node.Value, results)
} }
return context.ChildContext(results), nil return context.ChildContext(results), nil

View File

@ -5,93 +5,110 @@ import (
) )
var stringsOperatorScenarios = []expressionScenario{ var stringsOperatorScenarios = []expressionScenario{
// {
// description: "Join strings",
// document: `[cat, meow, 1, null, true]`,
// expression: `join("; ")`,
// expected: []string{
// "D0, P[], (!!str)::cat; meow; 1; ; true\n",
// },
// },
// {
// description: "Match string",
// document: `cat`,
// expression: `match("at")`,
// expected: []string{
// "D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
// },
// },
// {
// description: "Match string, case insensitive",
// document: `cAt`,
// expression: `match("(?i)at")`,
// expected: []string{
// "D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
// },
// },
// {
// description: "Match with capture groups",
// document: `a cat`,
// expression: `match("c(.t)")`,
// expected: []string{
// "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
// },
// },
{ {
description: "Match with names capture groups", description: "Join strings",
document: `[cat, meow, 1, null, true]`,
expression: `join("; ")`,
expected: []string{
"D0, P[], (!!str)::cat; meow; 1; ; true\n",
},
},
{
description: "Match string",
document: `cat`,
expression: `match("at")`,
expected: []string{
"D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
},
},
{
description: "Match string, case insensitive",
document: `cAt`,
expression: `match("(?i)at")`,
expected: []string{
"D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
},
},
{
description: "Match with capture groups",
document: `a cat`,
expression: `match("c(.t)")`,
expected: []string{
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
},
},
{
description: "Match with named capture groups",
document: `a cat`, document: `a cat`,
expression: `match("c(?P<cool>.t)")`, expression: `match("c(?P<cool>.t)")`,
expected: []string{ expected: []string{
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n name: cool\n", "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n name: cool\n",
}, },
}, },
// { {
// description: "Substitute / Replace string", description: "Match without global flag",
// subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", document: `cat cat`,
// document: `a: dogs are great`, expression: `match("cat")`,
// expression: `.a |= sub("dogs", "cats")`, expected: []string{
// expected: []string{ "D0, P[], ()::string: cat\noffset: 0\nlength: 3\ncaptures: []\n",
// "D0, P[], (doc)::a: cats are great\n", },
// }, },
// }, {
// { description: "Match with global flag",
// description: "Substitute / Replace string with regex", document: `cat cat`,
// subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", expression: `match("cat"; "g")`,
// document: "a: cat\nb: heat", expected: []string{
// expression: `.[] |= sub("(a)", "${1}r")`, "D0, P[], ()::string: cat\noffset: 0\nlength: 3\ncaptures: []\n",
// expected: []string{ "D0, P[], ()::string: cat\noffset: 4\nlength: 3\ncaptures: []\n",
// "D0, P[], (doc)::a: cart\nb: heart\n", },
// }, },
// }, {
// { description: "Substitute / Replace string",
// description: "Split strings", subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
// document: `"cat; meow; 1; ; true"`, document: `a: dogs are great`,
// expression: `split("; ")`, expression: `.a |= sub("dogs", "cats")`,
// expected: []string{ expected: []string{
// "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n", "D0, P[], (doc)::a: cats are great\n",
// }, },
// }, },
// { {
// description: "Split strings one match", description: "Substitute / Replace string with regex",
// document: `"word"`, subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
// expression: `split("; ")`, document: "a: cat\nb: heat",
// expected: []string{ expression: `.[] |= sub("(a)", "${1}r")`,
// "D0, P[], (!!seq)::- word\n", expected: []string{
// }, "D0, P[], (doc)::a: cart\nb: heart\n",
// }, },
// { },
// skipDoc: true, {
// document: `""`, description: "Split strings",
// expression: `split("; ")`, document: `"cat; meow; 1; ; true"`,
// expected: []string{ expression: `split("; ")`,
// "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error expected: []string{
// }, "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n",
// }, },
// { },
// skipDoc: true, {
// expression: `split("; ")`, description: "Split strings one match",
// expected: []string{}, document: `"word"`,
// }, expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::- word\n",
},
},
{
skipDoc: true,
document: `""`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error
},
},
{
skipDoc: true,
expression: `split("; ")`,
expected: []string{},
},
} }
func TestStringsOperatorScenarios(t *testing.T) { func TestStringsOperatorScenarios(t *testing.T) {