Added group_by operator

This commit is contained in:
Mike Farah 2021-10-26 15:07:50 +11:00
parent d390fdc641
commit 2491051fd9
8 changed files with 171 additions and 0 deletions

5
go.mod
View File

@ -13,8 +13,13 @@ require (
)
require (
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/mattn/go-colorable v0.1.8 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/timtadh/data-structures v0.5.3 // indirect
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
)
go 1.17

View File

@ -1,5 +1,7 @@
Encode operators will take the piped in object structure and encode it as a string in the desired format. The decode operators do the opposite, they take a formatted string and decode it into the relevant object structure.
Note that you can optionally pass an indent value to the encode functions (see below).
These operators are useful to process yaml documents that have stringified embeded yaml/json/props in them.
## Encode value as yaml string
Indent defaults to 2

54
pkg/yqlib/doc/Group By.md Normal file
View File

@ -0,0 +1,54 @@
This is used to group items in an array by an expression.
## Group by field
Given a sample.yml file of:
```yaml
- foo: 1
bar: 10
- foo: 3
bar: 100
- foo: 1
bar: 1
```
then
```bash
yq eval 'group_by(.foo)' sample.yml
```
will output
```yaml
- - foo: 1
bar: 10
- foo: 1
bar: 1
- - foo: 3
bar: 100
```
## Group by field, with nuls
Given a sample.yml file of:
```yaml
- cat: dog
- foo: 1
bar: 10
- foo: 3
bar: 100
- no: foo for you
- foo: 1
bar: 1
```
then
```bash
yq eval 'group_by(.foo)' sample.yml
```
will output
```yaml
- - cat: dog
- no: foo for you
- - foo: 1
bar: 10
- foo: 1
bar: 1
- - foo: 3
bar: 100
```

View File

@ -0,0 +1 @@
This is used to group items in an array by an expression.

View File

@ -317,6 +317,7 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`has`), opToken(hasOpType))
lexer.Add([]byte(`unique`), opToken(uniqueOpType))
lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType))
lexer.Add([]byte(`group_by`), opToken(groupByOpType))
lexer.Add([]byte(`explode`), opToken(explodeOpType))
lexer.Add([]byte(`or`), opToken(orOpType))
lexer.Add([]byte(`and`), opToken(andOpType))

View File

@ -113,6 +113,7 @@ var selectOpType = &operationType{Type: "SELECT", NumArgs: 1, Precedence: 50, Ha
var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler: hasOperator}
var uniqueOpType = &operationType{Type: "UNIQUE", NumArgs: 0, Precedence: 50, Handler: unique}
var uniqueByOpType = &operationType{Type: "UNIQUE_BY", NumArgs: 1, Precedence: 50, Handler: uniqueBy}
var groupByOpType = &operationType{Type: "GROUP_BY", NumArgs: 1, Precedence: 50, Handler: groupBy}
var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator}
type Operation struct {

View File

@ -0,0 +1,76 @@
package yqlib
import (
"container/list"
"fmt"
"github.com/elliotchance/orderedmap"
yaml "gopkg.in/yaml.v3"
)
func processIntoGroups(d *dataTreeNavigator, context Context, rhsExp *ExpressionNode, node *yaml.Node) (*orderedmap.OrderedMap, error) {
var newMatches = orderedmap.NewOrderedMap()
for _, node := range node.Content {
child := &CandidateNode{Node: node}
rhs, err := d.GetMatchingNodes(context.SingleReadonlyChildContext(child), rhsExp)
if err != nil {
return nil, err
}
keyValue := "null"
if rhs.MatchingNodes.Len() > 0 {
first := rhs.MatchingNodes.Front()
keyCandidate := first.Value.(*CandidateNode)
keyValue = keyCandidate.Node.Value
}
groupList, exists := newMatches.Get(keyValue)
if !exists {
groupList = list.New()
newMatches.Set(keyValue, groupList)
}
groupList.(*list.List).PushBack(node)
}
return newMatches, nil
}
func groupBy(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debugf("-- groupBy Operator")
var results = list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
candidateNode := unwrapDoc(candidate.Node)
if candidateNode.Kind != yaml.SequenceNode {
return Context{}, fmt.Errorf("Only arrays are supported for group by")
}
newMatches, err := processIntoGroups(d, context, expressionNode.Rhs, candidateNode)
if err != nil {
return Context{}, err
}
resultNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
for groupEl := newMatches.Front(); groupEl != nil; groupEl = groupEl.Next() {
groupResultNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
groupList := groupEl.Value.(*list.List)
for groupItem := groupList.Front(); groupItem != nil; groupItem = groupItem.Next() {
groupResultNode.Content = append(groupResultNode.Content, groupItem.Value.(*yaml.Node))
}
resultNode.Content = append(resultNode.Content, groupResultNode)
}
results.PushBack(candidate.CreateChild(nil, resultNode))
}
return context.ChildContext(results), nil
}

View File

@ -0,0 +1,31 @@
package yqlib
import (
"testing"
)
var groupByOperatorScenarios = []expressionScenario{
{
description: "Group by field",
document: `[{foo: 1, bar: 10}, {foo: 3, bar: 100}, {foo: 1, bar: 1}]`,
expression: `group_by(.foo)`,
expected: []string{
"D0, P[], (!!seq)::- - {foo: 1, bar: 10}\n - {foo: 1, bar: 1}\n- - {foo: 3, bar: 100}\n",
},
},
{
description: "Group by field, with nuls",
document: `[{cat: dog}, {foo: 1, bar: 10}, {foo: 3, bar: 100}, {no: foo for you}, {foo: 1, bar: 1}]`,
expression: `group_by(.foo)`,
expected: []string{
"D0, P[], (!!seq)::- - {cat: dog}\n - {no: foo for you}\n- - {foo: 1, bar: 10}\n - {foo: 1, bar: 1}\n- - {foo: 3, bar: 100}\n",
},
},
}
func TestGroupByOperatorScenarios(t *testing.T) {
for _, tt := range groupByOperatorScenarios {
testScenario(t, &tt)
}
documentScenarios(t, "Group By", groupByOperatorScenarios)
}