Added match operator

This commit is contained in:
Mike Farah 2021-07-09 15:33:41 +10:00
parent 573618e4ce
commit 69c45ff64a
4 changed files with 337 additions and 91 deletions

View File

@ -1,6 +1,81 @@
# String Operators
## Match with names capture groups
## RegEx
This uses golangs native regex functions under the hood - See https://github.com/google/re2/wiki/Syntax for the supported syntax.
## Join strings
Given a sample.yml file of:
```yaml
- cat
- meow
- 1
- null
- true
```
then
```bash
yq eval 'join("; ")' sample.yml
```
will output
```yaml
cat; meow; 1; ; true
```
## Match string
Given a sample.yml file of:
```yaml
cat
```
then
```bash
yq eval 'match("at")' sample.yml
```
will output
```yaml
string: at
offset: 1
length: 2
captures: []
```
## Match string, case insensitive
Given a sample.yml file of:
```yaml
cAt
```
then
```bash
yq eval 'match("(?i)at")' sample.yml
```
will output
```yaml
string: At
offset: 1
length: 2
captures: []
```
## Match with capture groups
Given a sample.yml file of:
```yaml
a cat
```
then
```bash
yq eval 'match("c(.t)")' sample.yml
```
will output
```yaml
string: cat
offset: 2
length: 3
captures:
- string: at
offset: 3
length: 2
```
## Match with named capture groups
Given a sample.yml file of:
```yaml
a cat
@ -21,3 +96,109 @@ captures:
name: cool
```
## Match without global flag
Given a sample.yml file of:
```yaml
cat cat
```
then
```bash
yq eval 'match("cat")' sample.yml
```
will output
```yaml
string: cat
offset: 0
length: 3
captures: []
```
## Match with global flag
Given a sample.yml file of:
```yaml
cat cat
```
then
```bash
yq eval 'match("cat"; "g")' sample.yml
```
will output
```yaml
string: cat
offset: 0
length: 3
captures: []
string: cat
offset: 4
length: 3
captures: []
```
## Substitute / Replace string
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
Given a sample.yml file of:
```yaml
a: dogs are great
```
then
```bash
yq eval '.a |= sub("dogs", "cats")' sample.yml
```
will output
```yaml
a: cats are great
```
## Substitute / Replace string with regex
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
Given a sample.yml file of:
```yaml
a: cat
b: heat
```
then
```bash
yq eval '.[] |= sub("(a)", "${1}r")' sample.yml
```
will output
```yaml
a: cart
b: heart
```
## Split strings
Given a sample.yml file of:
```yaml
cat; meow; 1; ; true
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- cat
- meow
- "1"
- ""
- "true"
```
## Split strings one match
Given a sample.yml file of:
```yaml
word
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- word
```

View File

@ -1 +1,4 @@
# String Operators
## RegEx
This uses golangs native regex functions under the hood - See https://github.com/google/re2/wiki/Syntax for the supported syntax.

View File

@ -92,13 +92,25 @@ func addMatch(original []*yaml.Node, match string, offset int, name string) []*y
return newContent
}
func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
type matchPreferences struct {
Global bool
}
func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
subNames := regEx.SubexpNames()
log.Debugf("subNames %v", subNames)
allMatches := regEx.FindAllStringSubmatch(value, -1)
allIndices := regEx.FindAllStringSubmatchIndex(value, -1)
var allMatches [][]string
var allIndices [][]int
if matchPrefs.Global {
allMatches = regEx.FindAllStringSubmatch(value, -1)
allIndices = regEx.FindAllStringSubmatchIndex(value, -1)
} else {
allMatches = [][]string{regEx.FindStringSubmatch(value)}
allIndices = [][]int{regEx.FindStringSubmatchIndex(value)}
}
for i, matches := range allMatches {
capturesNode := &yaml.Node{Kind: yaml.SequenceNode}
@ -121,13 +133,38 @@ func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results
}
func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
//rhs block operator
//lhs of block = regex
func extractMatchArguments(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (string, matchPreferences, error) {
regExExpNode := expressionNode.Rhs
regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.Rhs)
matchPrefs := matchPreferences{}
// we got given parameters e.g. match(exp; params)
if expressionNode.Rhs.Operation.OperationType == blockOpType {
block := expressionNode.Rhs
regExExpNode = block.Lhs
replacementNodes, err := d.GetMatchingNodes(context, block.Rhs)
if err != nil {
return Context{}, err
return "", matchPrefs, err
}
paramText := ""
if replacementNodes.MatchingNodes.Front() != nil {
paramText = replacementNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value
}
if strings.Contains(paramText, "g") {
paramText = strings.ReplaceAll(paramText, "g", "")
matchPrefs.Global = true
}
if strings.Contains(paramText, "i") {
return "", matchPrefs, fmt.Errorf(`'i' is not a valid option for match. To ignore case, use an expression like match("(?i)cat")`)
}
if len(paramText) > 0 {
return "", matchPrefs, fmt.Errorf(`Unrecognised match params '%v', please see docs at https://mikefarah.gitbook.io/yq/operators/string-operators`, paramText)
}
}
regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), regExExpNode)
if err != nil {
return "", matchPrefs, err
}
log.Debug(NodesToString(regExNodes.MatchingNodes))
regExStr := ""
@ -135,6 +172,14 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres
regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value
}
log.Debug("regEx %v", regExStr)
return regExStr, matchPrefs, nil
}
func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
regExStr, matchPrefs, err := extractMatchArguments(d, context, expressionNode)
if err != nil {
return Context{}, err
}
regEx, err := regexp.Compile(regExStr)
if err != nil {
@ -147,10 +192,10 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres
candidate := el.Value.(*CandidateNode)
node := unwrapDoc(candidate.Node)
if node.Tag != "!!str" {
return Context{}, fmt.Errorf("cannot substitute with %v, can only substitute strings. Hint: Most often you'll want to use '|=' over '=' for this operation.", node.Tag)
return Context{}, fmt.Errorf("cannot match with %v, can only match strings. Hint: Most often you'll want to use '|=' over '=' for this operation", node.Tag)
}
match(regEx, candidate, node.Value, results)
match(matchPrefs, regEx, candidate, node.Value, results)
}
return context.ChildContext(results), nil

View File

@ -5,93 +5,110 @@ import (
)
var stringsOperatorScenarios = []expressionScenario{
// {
// description: "Join strings",
// document: `[cat, meow, 1, null, true]`,
// expression: `join("; ")`,
// expected: []string{
// "D0, P[], (!!str)::cat; meow; 1; ; true\n",
// },
// },
// {
// description: "Match string",
// document: `cat`,
// expression: `match("at")`,
// expected: []string{
// "D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
// },
// },
// {
// description: "Match string, case insensitive",
// document: `cAt`,
// expression: `match("(?i)at")`,
// expected: []string{
// "D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
// },
// },
// {
// description: "Match with capture groups",
// document: `a cat`,
// expression: `match("c(.t)")`,
// expected: []string{
// "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
// },
// },
{
description: "Match with names capture groups",
description: "Join strings",
document: `[cat, meow, 1, null, true]`,
expression: `join("; ")`,
expected: []string{
"D0, P[], (!!str)::cat; meow; 1; ; true\n",
},
},
{
description: "Match string",
document: `cat`,
expression: `match("at")`,
expected: []string{
"D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
},
},
{
description: "Match string, case insensitive",
document: `cAt`,
expression: `match("(?i)at")`,
expected: []string{
"D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
},
},
{
description: "Match with capture groups",
document: `a cat`,
expression: `match("c(.t)")`,
expected: []string{
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
},
},
{
description: "Match with named capture groups",
document: `a cat`,
expression: `match("c(?P<cool>.t)")`,
expected: []string{
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n name: cool\n",
},
},
// {
// description: "Substitute / Replace string",
// subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
// document: `a: dogs are great`,
// expression: `.a |= sub("dogs", "cats")`,
// expected: []string{
// "D0, P[], (doc)::a: cats are great\n",
// },
// },
// {
// description: "Substitute / Replace string with regex",
// subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
// document: "a: cat\nb: heat",
// expression: `.[] |= sub("(a)", "${1}r")`,
// expected: []string{
// "D0, P[], (doc)::a: cart\nb: heart\n",
// },
// },
// {
// description: "Split strings",
// document: `"cat; meow; 1; ; true"`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n",
// },
// },
// {
// description: "Split strings one match",
// document: `"word"`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::- word\n",
// },
// },
// {
// skipDoc: true,
// document: `""`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error
// },
// },
// {
// skipDoc: true,
// expression: `split("; ")`,
// expected: []string{},
// },
{
description: "Match without global flag",
document: `cat cat`,
expression: `match("cat")`,
expected: []string{
"D0, P[], ()::string: cat\noffset: 0\nlength: 3\ncaptures: []\n",
},
},
{
description: "Match with global flag",
document: `cat cat`,
expression: `match("cat"; "g")`,
expected: []string{
"D0, P[], ()::string: cat\noffset: 0\nlength: 3\ncaptures: []\n",
"D0, P[], ()::string: cat\noffset: 4\nlength: 3\ncaptures: []\n",
},
},
{
description: "Substitute / Replace string",
subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
document: `a: dogs are great`,
expression: `.a |= sub("dogs", "cats")`,
expected: []string{
"D0, P[], (doc)::a: cats are great\n",
},
},
{
description: "Substitute / Replace string with regex",
subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
document: "a: cat\nb: heat",
expression: `.[] |= sub("(a)", "${1}r")`,
expected: []string{
"D0, P[], (doc)::a: cart\nb: heart\n",
},
},
{
description: "Split strings",
document: `"cat; meow; 1; ; true"`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n",
},
},
{
description: "Split strings one match",
document: `"word"`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::- word\n",
},
},
{
skipDoc: true,
document: `""`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error
},
},
{
skipDoc: true,
expression: `split("; ")`,
expected: []string{},
},
}
func TestStringsOperatorScenarios(t *testing.T) {