(wip) regex match op

This commit is contained in:
Mike Farah 2021-07-07 22:40:46 +10:00
parent cae3d95fdc
commit cc7ea83506
7 changed files with 238 additions and 102 deletions

View File

@ -299,3 +299,33 @@ foobar:
thing: foobar_thing thing: foobar_thing
``` ```
## Dereference and update a field
`Use explode with multiply to dereference an object
Given a sample.yml file of:
```yaml
item_value: &item_value
value: true
thingOne:
name: item_1
!!merge <<: *item_value
thingTwo:
name: item_2
!!merge <<: *item_value
```
then
```bash
yq eval '.thingOne |= explode(.) * {"value": false}' sample.yml
```
will output
```yaml
item_value: &item_value
value: true
thingOne:
name: item_1
value: false
thingTwo:
name: item_2
!!merge <<: *item_value
```

View File

@ -1,88 +1,56 @@
# String Operators # String Operators
## Join strings ## Match string
Given a sample.yml file of: Given a sample.yml file of:
```yaml ```yaml
- cat cat
- meow
- 1
- null
- true
``` ```
then then
```bash ```bash
yq eval 'join("; ")' sample.yml yq eval 'match("at")' sample.yml
``` ```
will output will output
```yaml ```yaml
cat; meow; 1; ; true string: at
offset: 1
length: 2
captures: []
``` ```
## Substitute / Replace string ## Match string, case insensitive
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
Given a sample.yml file of: Given a sample.yml file of:
```yaml ```yaml
a: dogs are great cAt
``` ```
then then
```bash ```bash
yq eval '.a |= sub("dogs", "cats")' sample.yml yq eval 'match("(?i)at")' sample.yml
``` ```
will output will output
```yaml ```yaml
a: cats are great string: At
offset: 1
length: 2
captures: []
``` ```
## Substitute / Replace string with regex ## Match with capture groups
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
Given a sample.yml file of: Given a sample.yml file of:
```yaml ```yaml
a: cat a cat
b: heat
``` ```
then then
```bash ```bash
yq eval '.[] |= sub("(a)", "${1}r")' sample.yml yq eval 'match("c(.t)")' sample.yml
``` ```
will output will output
```yaml ```yaml
a: cart string: cat
b: heart offset: 2
``` length: 3
captures:
## Split strings - string: at
Given a sample.yml file of: offset: 3
```yaml length: 2
cat; meow; 1; ; true
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- cat
- meow
- "1"
- ""
- "true"
```
## Split strings one match
Given a sample.yml file of:
```yaml
word
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- word
``` ```

View File

@ -276,6 +276,7 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`join`), opToken(joinStringOpType)) lexer.Add([]byte(`join`), opToken(joinStringOpType))
lexer.Add([]byte(`sub`), opToken(subStringOpType)) lexer.Add([]byte(`sub`), opToken(subStringOpType))
lexer.Add([]byte(`match`), opToken(matchOpType))
lexer.Add([]byte(`any`), opToken(anyOpType)) lexer.Add([]byte(`any`), opToken(anyOpType))
lexer.Add([]byte(`any_c`), opToken(anyConditionOpType)) lexer.Add([]byte(`any_c`), opToken(anyConditionOpType))

View File

@ -83,6 +83,7 @@ var explodeOpType = &operationType{Type: "EXPLODE", NumArgs: 1, Precedence: 50,
var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 50, Handler: sortKeysOperator} var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 50, Handler: sortKeysOperator}
var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator} var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator}
var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator} var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator}
var matchOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: matchOperator}
var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator} var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator}
var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator} var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator}
@ -114,8 +115,8 @@ type Operation struct {
UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs
} }
func createValueOperation(value interface{}, stringValue string) *Operation { func createScalarNode(value interface{}, stringValue string) *yaml.Node {
var node yaml.Node = yaml.Node{Kind: yaml.ScalarNode} var node = &yaml.Node{Kind: yaml.ScalarNode}
node.Value = stringValue node.Value = stringValue
switch value.(type) { switch value.(type) {
@ -130,12 +131,17 @@ func createValueOperation(value interface{}, stringValue string) *Operation {
case nil: case nil:
node.Tag = "!!null" node.Tag = "!!null"
} }
return node
}
func createValueOperation(value interface{}, stringValue string) *Operation {
var node *yaml.Node = createScalarNode(value, stringValue)
return &Operation{ return &Operation{
OperationType: valueOpType, OperationType: valueOpType,
Value: value, Value: value,
StringValue: stringValue, StringValue: stringValue,
CandidateNode: &CandidateNode{Node: &node}, CandidateNode: &CandidateNode{Node: node},
} }
} }

View File

@ -12,6 +12,29 @@ var specDocument = `- &CENTER { x: 1, y: 2 }
var expectedSpecResult = "D0, P[4], (!!map)::x: 1\ny: 2\nr: 10\n" var expectedSpecResult = "D0, P[4], (!!map)::x: 1\ny: 2\nr: 10\n"
var simpleArrayRef = `
item_value: &item_value
value: true
thingOne:
name: item_1
<<: *item_value
thingTwo:
name: item_2
<<: *item_value
`
var expectedUpdatedArrayRef = `D0, P[], (doc)::item_value: &item_value
value: true
thingOne:
name: item_1
value: false
thingTwo:
name: item_2
!!merge <<: *item_value
`
var anchorOperatorScenarios = []expressionScenario{ var anchorOperatorScenarios = []expressionScenario{
{ {
description: "Merge one map", description: "Merge one map",
@ -197,6 +220,13 @@ foobar:
"D0, P[], (doc)::{f: {a: cat, b: {f: cat}, cat: {f: cat}}}\n", "D0, P[], (doc)::{f: {a: cat, b: {f: cat}, cat: {f: cat}}}\n",
}, },
}, },
{
description: "Dereference and update a field",
subdescription: "`Use explode with multiply to dereference an object",
document: simpleArrayRef,
expression: `.thingOne |= explode(.) * {"value": false}`,
expected: []string{expectedUpdatedArrayRef},
},
} }
func TestAnchorAliasOperatorScenarios(t *testing.T) { func TestAnchorAliasOperatorScenarios(t *testing.T) {

View File

@ -74,6 +74,83 @@ func substituteStringOperator(d *dataTreeNavigator, context Context, expressionN
} }
func addMatch(original []*yaml.Node, match string, offset int) []*yaml.Node {
return append(original,
createScalarNode("string", "string"),
createScalarNode(match, match),
createScalarNode("offset", "offset"),
createScalarNode(offset, fmt.Sprintf("%v", offset)),
createScalarNode("length", "length"),
createScalarNode(len(match), fmt.Sprintf("%v", len(match))))
}
func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
// captures = FindAllStringSubmatch
// FindAllStringSubmatchIndex = offset?
//string array
// subNames := regEx.SubexpNames()
//array of arrays
allMatches := regEx.FindAllStringSubmatch(value, -1)
allIndices := regEx.FindAllStringSubmatchIndex(value, -1)
for i, matches := range allMatches {
capturesNode := &yaml.Node{Kind: yaml.SequenceNode}
match, submatches := matches[0], matches[1:]
for j, submatch := range submatches {
captureNode := &yaml.Node{Kind: yaml.MappingNode}
captureNode.Content = addMatch(capturesNode.Content, submatch, allIndices[i][2+j*2])
capturesNode.Content = append(capturesNode.Content, captureNode)
}
node := &yaml.Node{Kind: yaml.MappingNode}
node.Content = addMatch(node.Content, match, allIndices[i][0])
node.Content = append(node.Content,
createScalarNode("captures", "captures"),
capturesNode,
)
results.PushBack(candidate.CreateChild(nil, node))
}
}
func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
//rhs block operator
//lhs of block = regex
regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.Rhs)
if err != nil {
return Context{}, err
}
log.Debug(NodesToString(regExNodes.MatchingNodes))
regExStr := ""
if regExNodes.MatchingNodes.Front() != nil {
regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value
}
log.Debug("regEx %v", regExStr)
regEx, err := regexp.Compile(regExStr)
if err != nil {
return Context{}, err
}
var results = list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
node := unwrapDoc(candidate.Node)
if node.Tag != "!!str" {
return Context{}, fmt.Errorf("cannot substitute with %v, can only substitute strings. Hint: Most often you'll want to use '|=' over '=' for this operation.", node.Tag)
}
match(regEx, candidate, node.Value, results)
}
return context.ChildContext(results), nil
}
func joinStringOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { func joinStringOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debugf("-- joinStringOperator") log.Debugf("-- joinStringOperator")
joinStr := "" joinStr := ""

View File

@ -5,61 +5,85 @@ import (
) )
var stringsOperatorScenarios = []expressionScenario{ var stringsOperatorScenarios = []expressionScenario{
// {
// description: "Join strings",
// document: `[cat, meow, 1, null, true]`,
// expression: `join("; ")`,
// expected: []string{
// "D0, P[], (!!str)::cat; meow; 1; ; true\n",
// },
// },
{ {
description: "Join strings", description: "Match string",
document: `[cat, meow, 1, null, true]`, document: `cat`,
expression: `join("; ")`, expression: `match("at")`,
expected: []string{ expected: []string{
"D0, P[], (!!str)::cat; meow; 1; ; true\n", "D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
}, },
}, },
{ {
description: "Substitute / Replace string", description: "Match string, case insensitive",
subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", document: `cAt`,
document: `a: dogs are great`, expression: `match("(?i)at")`,
expression: `.a |= sub("dogs", "cats")`,
expected: []string{ expected: []string{
"D0, P[], (doc)::a: cats are great\n", "D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
}, },
}, },
{ {
description: "Substitute / Replace string with regex", description: "Match with capture groups",
subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", document: `a cat`,
document: "a: cat\nb: heat", expression: `match("c(.t)")`,
expression: `.[] |= sub("(a)", "${1}r")`,
expected: []string{ expected: []string{
"D0, P[], (doc)::a: cart\nb: heart\n", "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
}, },
}, },
{ // {
description: "Split strings", // description: "Substitute / Replace string",
document: `"cat; meow; 1; ; true"`, // subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
expression: `split("; ")`, // document: `a: dogs are great`,
expected: []string{ // expression: `.a |= sub("dogs", "cats")`,
"D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n", // expected: []string{
}, // "D0, P[], (doc)::a: cats are great\n",
}, // },
{ // },
description: "Split strings one match", // {
document: `"word"`, // description: "Substitute / Replace string with regex",
expression: `split("; ")`, // subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
expected: []string{ // document: "a: cat\nb: heat",
"D0, P[], (!!seq)::- word\n", // expression: `.[] |= sub("(a)", "${1}r")`,
}, // expected: []string{
}, // "D0, P[], (doc)::a: cart\nb: heart\n",
{ // },
skipDoc: true, // },
document: `""`, // {
expression: `split("; ")`, // description: "Split strings",
expected: []string{ // document: `"cat; meow; 1; ; true"`,
"D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error // expression: `split("; ")`,
}, // expected: []string{
}, // "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n",
{ // },
skipDoc: true, // },
expression: `split("; ")`, // {
expected: []string{}, // description: "Split strings one match",
}, // document: `"word"`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::- word\n",
// },
// },
// {
// skipDoc: true,
// document: `""`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error
// },
// },
// {
// skipDoc: true,
// expression: `split("; ")`,
// expected: []string{},
// },
} }
func TestStringsOperatorScenarios(t *testing.T) { func TestStringsOperatorScenarios(t *testing.T) {