Added context variable for reduce

This commit is contained in:
Mike Farah 2021-02-15 17:31:12 +11:00
parent 99b08fd612
commit 9072e8d3b3
8 changed files with 137 additions and 8 deletions

View File

@ -35,6 +35,26 @@ will output
0
```
## Get file indices of multiple documents
Given a sample.yml file of:
```yaml
a: cat
```
And another sample another.yml file of:
```yaml
a: cat
```
then
```bash
yq eval-all 'fileIndex' sample.yml another.yml
```
will output
```yaml
0
---
1
```
## Get file index alias
Given a sample.yml file of:
```yaml

View File

@ -1,11 +1,27 @@
Reduce is a powerful way to process a collection of data into a new form.
```
<exp> as $<name> ireduce (<init>; <block>)
```
e.g.
```
.[] as $item ireduce (0; . + $item)
```
On the LHS we are configuring the collection of items that will be reduced `<exp>` as well as what each element will be called `$<name>`. Note that the array has been splatted into its individual elements.
On the RHS there is `<init>`, the starting value of the accumulator and `<block>`, the expression that will update the accumulator for each element in the collection.
Note that within the block expression, `.` will evaluate to the current value of the accumulator. This effectively means that within the `reduce` block you can no longer access data other than elements of the array set as `$<name>`. For simple things, this is probably fine, but often you will need to refer to other data elements.
This can be done by setting a variable using `as` and piping that into the `reduce` operation, or you can simply refer to `$context` which is exactly that, automatically set for you for convenience. See examples below.
## yq vs jq syntax
Reduce syntax in `yq` is a little different from `jq` - as `yq` (currently) isn't as sophisticated as `jq` and its only supports infix notation (e.g. a + b, the operator is in the middle of the two parameters) - where as `jq` uses a mix of infix notation with _prefix_ notation (e.g. `reduce a b` is like writing `+ a b`).
Reduce syntax in `yq` is a little different from `jq` - as `yq` (currently) isn't as sophisticated as `jq` and its only supports infix notation (e.g. a + b, where the operator is in the middle of the two parameters) - where as `jq` uses a mix of infix notation with _prefix_ notation (e.g. `reduce a b` is like writing `+ a b`).
To that end, the reduce operator is called `ireduce` for backwards compatability if a prefix version of `reduce` is ever added.
## Sum numbers
Given a sample.yml file of:
```yaml
@ -41,3 +57,47 @@ Cathy: apples
Bob: bananas
```
## Merge all documents together - using context
The _$context_ variable set by reduce lets you access the data outside the reduce block.
Given a sample.yml file of:
```yaml
a: cat
```
And another sample another.yml file of:
```yaml
b: dog
```
then
```bash
yq eval-all 'fileIndex as $item ireduce ({}; . * ($context | select(fileIndex==$item)) )' sample.yml another.yml
```
will output
```yaml
a: cat
b: dog
```
## Merge all documents together - without using context
`$context` is just a convenient variable that `reduce` sets, you can use your own for more control
Given a sample.yml file of:
```yaml
c:
a: cat
```
And another sample another.yml file of:
```yaml
c:
b: dog
```
then
```bash
yq eval-all '.c as $root | fileIndex as $item ireduce ({}; . * ($root | select(fileIndex==$item)) )' sample.yml another.yml
```
will output
```yaml
a: cat
b: dog
```

View File

@ -1,7 +1,24 @@
Reduce is a powerful way to process a collection of data into a new form.
```
<exp> as $<name> ireduce (<init>; <block>)
```
e.g.
```
.[] as $item ireduce (0; . + $item)
```
On the LHS we are configuring the collection of items that will be reduced `<exp>` as well as what each element will be called `$<name>`. Note that the array has been splatted into its individual elements.
On the RHS there is `<init>`, the starting value of the accumulator and `<block>`, the expression that will update the accumulator for each element in the collection.
Note that within the block expression, `.` will evaluate to the current value of the accumulator. This effectively means that within the `reduce` block you can no longer access data other than elements of the array set as `$<name>`. For simple things, this is probably fine, but often you will need to refer to other data elements.
This can be done by setting a variable using `as` and piping that into the `reduce` operation, or you can simply refer to `$context` which is exactly that, automatically set for you for convenience. See examples below.
## yq vs jq syntax
Reduce syntax in `yq` is a little different from `jq` - as `yq` (currently) isn't as sophisticated as `jq` and its only supports infix notation (e.g. a + b, the operator is in the middle of the two parameters) - where as `jq` uses a mix of infix notation with _prefix_ notation (e.g. `reduce a b` is like writing `+ a b`).
Reduce syntax in `yq` is a little different from `jq` - as `yq` (currently) isn't as sophisticated as `jq` and its only supports infix notation (e.g. a + b, where the operator is in the middle of the two parameters) - where as `jq` uses a mix of infix notation with _prefix_ notation (e.g. `reduce a b` is like writing `+ a b`).
To that end, the reduce operator is called `ireduce` for backwards compatability if a prefix version of `reduce` is ever added.

View File

@ -186,7 +186,7 @@ func getVariableOpToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
value = value[1 : len(value)-1]
value = value[1:]
getVarOperation := createValueOperation(value, value)
getVarOperation.OperationType = getVariableOpType

View File

@ -25,7 +25,7 @@ type operationType struct {
var orOpType = &operationType{Type: "OR", NumArgs: 2, Precedence: 20, Handler: orOperator}
var andOpType = &operationType{Type: "AND", NumArgs: 2, Precedence: 20, Handler: andOperator}
var reduceOpType = &operationType{Type: "REDUCE", NumArgs: 2, Precedence: 5, Handler: reduceOperator}
var reduceOpType = &operationType{Type: "REDUCE", NumArgs: 2, Precedence: 35, Handler: reduceOperator}
var blockOpType = &operationType{Type: "BLOCK", Precedence: 10, NumArgs: 2, Handler: emptyOperator}

View File

@ -21,6 +21,16 @@ var fileOperatorScenarios = []expressionScenario{
"D0, P[], (!!int)::0\n",
},
},
{
description: "Get file indices of multiple documents",
document: `{a: cat}`,
document2: `{a: cat}`,
expression: `fileIndex`,
expected: []string{
"D0, P[], (!!int)::0\n",
"D0, P[], (!!int)::1\n",
},
},
{
description: "Get file index alias",
document: `{a: cat}`,

View File

@ -39,6 +39,8 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
return Context{}, err
}
accum.SetVariable("context", context.MatchingNodes)
log.Debugf("with variable %v", variableName)
blockExp := expressionNode.Rhs.Rhs

View File

@ -21,6 +21,26 @@ var reduceOperatorScenarios = []expressionScenario{
"D0, P[], (!!map)::Cathy: apples\nBob: bananas\n",
},
},
{
description: "Merge all documents together - using context",
subdescription: "The _$context_ variable set by reduce lets you access the data outside the reduce block.",
document: `a: cat`,
document2: `b: dog`,
expression: `fileIndex as $item ireduce ({}; . * ($context | select(fileIndex==$item)) )`,
expected: []string{
"D0, P[], (!!map)::a: cat\nb: dog\n",
},
},
{
description: "Merge all documents together - without using context",
subdescription: "`$context` is just a convenient variable that `reduce` sets, you can use your own for more control",
document: `c: {a: cat}`,
document2: `c: {b: dog}`,
expression: `.c as $root | fileIndex as $item ireduce ({}; . * ($root | select(fileIndex==$item)) )`,
expected: []string{
"D0, P[], (!!map)::{a: cat, b: dog}\n",
},
},
}
func TestReduceOperatorScenarios(t *testing.T) {