(wip) regex match op

This commit is contained in:
Mike Farah 2021-07-07 22:40:46 +10:00
parent cae3d95fdc
commit cc7ea83506
7 changed files with 238 additions and 102 deletions

View File

@ -299,3 +299,33 @@ foobar:
thing: foobar_thing
```
## Dereference and update a field
`Use explode with multiply to dereference an object
Given a sample.yml file of:
```yaml
item_value: &item_value
value: true
thingOne:
name: item_1
!!merge <<: *item_value
thingTwo:
name: item_2
!!merge <<: *item_value
```
then
```bash
yq eval '.thingOne |= explode(.) * {"value": false}' sample.yml
```
will output
```yaml
item_value: &item_value
value: true
thingOne:
name: item_1
value: false
thingTwo:
name: item_2
!!merge <<: *item_value
```

View File

@ -1,88 +1,56 @@
# String Operators
## Join strings
## Match string
Given a sample.yml file of:
```yaml
- cat
- meow
- 1
- null
- true
cat
```
then
```bash
yq eval 'join("; ")' sample.yml
yq eval 'match("at")' sample.yml
```
will output
```yaml
cat; meow; 1; ; true
string: at
offset: 1
length: 2
captures: []
```
## Substitute / Replace string
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
## Match string, case insensitive
Given a sample.yml file of:
```yaml
a: dogs are great
cAt
```
then
```bash
yq eval '.a |= sub("dogs", "cats")' sample.yml
yq eval 'match("(?i)at")' sample.yml
```
will output
```yaml
a: cats are great
string: At
offset: 1
length: 2
captures: []
```
## Substitute / Replace string with regex
This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)
Note the use of `|=` to run in context of the current string value.
## Match with capture groups
Given a sample.yml file of:
```yaml
a: cat
b: heat
a cat
```
then
```bash
yq eval '.[] |= sub("(a)", "${1}r")' sample.yml
yq eval 'match("c(.t)")' sample.yml
```
will output
```yaml
a: cart
b: heart
```
## Split strings
Given a sample.yml file of:
```yaml
cat; meow; 1; ; true
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- cat
- meow
- "1"
- ""
- "true"
```
## Split strings one match
Given a sample.yml file of:
```yaml
word
```
then
```bash
yq eval 'split("; ")' sample.yml
```
will output
```yaml
- word
string: cat
offset: 2
length: 3
captures:
- string: at
offset: 3
length: 2
```

View File

@ -276,6 +276,7 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`join`), opToken(joinStringOpType))
lexer.Add([]byte(`sub`), opToken(subStringOpType))
lexer.Add([]byte(`match`), opToken(matchOpType))
lexer.Add([]byte(`any`), opToken(anyOpType))
lexer.Add([]byte(`any_c`), opToken(anyConditionOpType))

View File

@ -83,6 +83,7 @@ var explodeOpType = &operationType{Type: "EXPLODE", NumArgs: 1, Precedence: 50,
var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 50, Handler: sortKeysOperator}
var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator}
var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator}
var matchOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: matchOperator}
var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator}
var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator}
@ -114,8 +115,8 @@ type Operation struct {
UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs
}
func createValueOperation(value interface{}, stringValue string) *Operation {
var node yaml.Node = yaml.Node{Kind: yaml.ScalarNode}
func createScalarNode(value interface{}, stringValue string) *yaml.Node {
var node = &yaml.Node{Kind: yaml.ScalarNode}
node.Value = stringValue
switch value.(type) {
@ -130,12 +131,17 @@ func createValueOperation(value interface{}, stringValue string) *Operation {
case nil:
node.Tag = "!!null"
}
return node
}
func createValueOperation(value interface{}, stringValue string) *Operation {
var node *yaml.Node = createScalarNode(value, stringValue)
return &Operation{
OperationType: valueOpType,
Value: value,
StringValue: stringValue,
CandidateNode: &CandidateNode{Node: &node},
CandidateNode: &CandidateNode{Node: node},
}
}

View File

@ -12,6 +12,29 @@ var specDocument = `- &CENTER { x: 1, y: 2 }
var expectedSpecResult = "D0, P[4], (!!map)::x: 1\ny: 2\nr: 10\n"
var simpleArrayRef = `
item_value: &item_value
value: true
thingOne:
name: item_1
<<: *item_value
thingTwo:
name: item_2
<<: *item_value
`
var expectedUpdatedArrayRef = `D0, P[], (doc)::item_value: &item_value
value: true
thingOne:
name: item_1
value: false
thingTwo:
name: item_2
!!merge <<: *item_value
`
var anchorOperatorScenarios = []expressionScenario{
{
description: "Merge one map",
@ -197,6 +220,13 @@ foobar:
"D0, P[], (doc)::{f: {a: cat, b: {f: cat}, cat: {f: cat}}}\n",
},
},
{
description: "Dereference and update a field",
subdescription: "`Use explode with multiply to dereference an object",
document: simpleArrayRef,
expression: `.thingOne |= explode(.) * {"value": false}`,
expected: []string{expectedUpdatedArrayRef},
},
}
func TestAnchorAliasOperatorScenarios(t *testing.T) {

View File

@ -74,6 +74,83 @@ func substituteStringOperator(d *dataTreeNavigator, context Context, expressionN
}
func addMatch(original []*yaml.Node, match string, offset int) []*yaml.Node {
return append(original,
createScalarNode("string", "string"),
createScalarNode(match, match),
createScalarNode("offset", "offset"),
createScalarNode(offset, fmt.Sprintf("%v", offset)),
createScalarNode("length", "length"),
createScalarNode(len(match), fmt.Sprintf("%v", len(match))))
}
func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
// captures = FindAllStringSubmatch
// FindAllStringSubmatchIndex = offset?
//string array
// subNames := regEx.SubexpNames()
//array of arrays
allMatches := regEx.FindAllStringSubmatch(value, -1)
allIndices := regEx.FindAllStringSubmatchIndex(value, -1)
for i, matches := range allMatches {
capturesNode := &yaml.Node{Kind: yaml.SequenceNode}
match, submatches := matches[0], matches[1:]
for j, submatch := range submatches {
captureNode := &yaml.Node{Kind: yaml.MappingNode}
captureNode.Content = addMatch(capturesNode.Content, submatch, allIndices[i][2+j*2])
capturesNode.Content = append(capturesNode.Content, captureNode)
}
node := &yaml.Node{Kind: yaml.MappingNode}
node.Content = addMatch(node.Content, match, allIndices[i][0])
node.Content = append(node.Content,
createScalarNode("captures", "captures"),
capturesNode,
)
results.PushBack(candidate.CreateChild(nil, node))
}
}
func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
//rhs block operator
//lhs of block = regex
regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.Rhs)
if err != nil {
return Context{}, err
}
log.Debug(NodesToString(regExNodes.MatchingNodes))
regExStr := ""
if regExNodes.MatchingNodes.Front() != nil {
regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value
}
log.Debug("regEx %v", regExStr)
regEx, err := regexp.Compile(regExStr)
if err != nil {
return Context{}, err
}
var results = list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
node := unwrapDoc(candidate.Node)
if node.Tag != "!!str" {
return Context{}, fmt.Errorf("cannot substitute with %v, can only substitute strings. Hint: Most often you'll want to use '|=' over '=' for this operation.", node.Tag)
}
match(regEx, candidate, node.Value, results)
}
return context.ChildContext(results), nil
}
func joinStringOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debugf("-- joinStringOperator")
joinStr := ""

View File

@ -5,61 +5,85 @@ import (
)
var stringsOperatorScenarios = []expressionScenario{
// {
// description: "Join strings",
// document: `[cat, meow, 1, null, true]`,
// expression: `join("; ")`,
// expected: []string{
// "D0, P[], (!!str)::cat; meow; 1; ; true\n",
// },
// },
{
description: "Join strings",
document: `[cat, meow, 1, null, true]`,
expression: `join("; ")`,
description: "Match string",
document: `cat`,
expression: `match("at")`,
expected: []string{
"D0, P[], (!!str)::cat; meow; 1; ; true\n",
"D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
},
},
{
description: "Substitute / Replace string",
subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
document: `a: dogs are great`,
expression: `.a |= sub("dogs", "cats")`,
description: "Match string, case insensitive",
document: `cAt`,
expression: `match("(?i)at")`,
expected: []string{
"D0, P[], (doc)::a: cats are great\n",
"D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
},
},
{
description: "Substitute / Replace string with regex",
subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
document: "a: cat\nb: heat",
expression: `.[] |= sub("(a)", "${1}r")`,
description: "Match with capture groups",
document: `a cat`,
expression: `match("c(.t)")`,
expected: []string{
"D0, P[], (doc)::a: cart\nb: heart\n",
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
},
},
{
description: "Split strings",
document: `"cat; meow; 1; ; true"`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n",
},
},
{
description: "Split strings one match",
document: `"word"`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::- word\n",
},
},
{
skipDoc: true,
document: `""`,
expression: `split("; ")`,
expected: []string{
"D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error
},
},
{
skipDoc: true,
expression: `split("; ")`,
expected: []string{},
},
// {
// description: "Substitute / Replace string",
// subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
// document: `a: dogs are great`,
// expression: `.a |= sub("dogs", "cats")`,
// expected: []string{
// "D0, P[], (doc)::a: cats are great\n",
// },
// },
// {
// description: "Substitute / Replace string with regex",
// subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.",
// document: "a: cat\nb: heat",
// expression: `.[] |= sub("(a)", "${1}r")`,
// expected: []string{
// "D0, P[], (doc)::a: cart\nb: heart\n",
// },
// },
// {
// description: "Split strings",
// document: `"cat; meow; 1; ; true"`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n",
// },
// },
// {
// description: "Split strings one match",
// document: `"word"`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::- word\n",
// },
// },
// {
// skipDoc: true,
// document: `""`,
// expression: `split("; ")`,
// expected: []string{
// "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error
// },
// },
// {
// skipDoc: true,
// expression: `split("; ")`,
// expected: []string{},
// },
}
func TestStringsOperatorScenarios(t *testing.T) {