From 2491051fd9d926720d3b93b65e425c6f60e4c299 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Tue, 26 Oct 2021 15:07:50 +1100 Subject: [PATCH] Added group_by operator --- go.mod | 5 ++ pkg/yqlib/doc/Encoder and Decoder.md | 2 + pkg/yqlib/doc/Group By.md | 54 ++++++++++++++++++++ pkg/yqlib/doc/headers/Group By.md | 1 + pkg/yqlib/expression_tokeniser.go | 1 + pkg/yqlib/lib.go | 1 + pkg/yqlib/operator_group_by.go | 76 ++++++++++++++++++++++++++++ pkg/yqlib/operator_group_by_test.go | 31 ++++++++++++ 8 files changed, 171 insertions(+) create mode 100644 pkg/yqlib/doc/Group By.md create mode 100644 pkg/yqlib/doc/headers/Group By.md create mode 100644 pkg/yqlib/operator_group_by.go create mode 100644 pkg/yqlib/operator_group_by_test.go diff --git a/go.mod b/go.mod index bc4fa628..6f7e79f4 100644 --- a/go.mod +++ b/go.mod @@ -13,8 +13,13 @@ require ( ) require ( + github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/mattn/go-colorable v0.1.8 // indirect + github.com/mattn/go-isatty v0.0.12 // indirect + github.com/spf13/pflag v1.0.5 // indirect github.com/timtadh/data-structures v0.5.3 // indirect golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect ) go 1.17 diff --git a/pkg/yqlib/doc/Encoder and Decoder.md b/pkg/yqlib/doc/Encoder and Decoder.md index e5586234..1c0741c8 100644 --- a/pkg/yqlib/doc/Encoder and Decoder.md +++ b/pkg/yqlib/doc/Encoder and Decoder.md @@ -1,5 +1,7 @@ Encode operators will take the piped in object structure and encode it as a string in the desired format. The decode operators do the opposite, they take a formatted string and decode it into the relevant object structure. +Note that you can optionally pass an indent value to the encode functions (see below). + These operators are useful to process yaml documents that have stringified embeded yaml/json/props in them. ## Encode value as yaml string Indent defaults to 2 diff --git a/pkg/yqlib/doc/Group By.md b/pkg/yqlib/doc/Group By.md new file mode 100644 index 00000000..f135b1e9 --- /dev/null +++ b/pkg/yqlib/doc/Group By.md @@ -0,0 +1,54 @@ +This is used to group items in an array by an expression. + +## Group by field +Given a sample.yml file of: +```yaml +- foo: 1 + bar: 10 +- foo: 3 + bar: 100 +- foo: 1 + bar: 1 +``` +then +```bash +yq eval 'group_by(.foo)' sample.yml +``` +will output +```yaml +- - foo: 1 + bar: 10 + - foo: 1 + bar: 1 +- - foo: 3 + bar: 100 +``` + +## Group by field, with nuls +Given a sample.yml file of: +```yaml +- cat: dog +- foo: 1 + bar: 10 +- foo: 3 + bar: 100 +- no: foo for you +- foo: 1 + bar: 1 +``` +then +```bash +yq eval 'group_by(.foo)' sample.yml +``` +will output +```yaml +- - cat: dog + - no: foo for you +- - foo: 1 + bar: 10 + - foo: 1 + bar: 1 +- - foo: 3 + bar: 100 +``` + diff --git a/pkg/yqlib/doc/headers/Group By.md b/pkg/yqlib/doc/headers/Group By.md new file mode 100644 index 00000000..365cc698 --- /dev/null +++ b/pkg/yqlib/doc/headers/Group By.md @@ -0,0 +1 @@ +This is used to group items in an array by an expression. diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go index 8ff25028..8f7db58e 100644 --- a/pkg/yqlib/expression_tokeniser.go +++ b/pkg/yqlib/expression_tokeniser.go @@ -317,6 +317,7 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`has`), opToken(hasOpType)) lexer.Add([]byte(`unique`), opToken(uniqueOpType)) lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType)) + lexer.Add([]byte(`group_by`), opToken(groupByOpType)) lexer.Add([]byte(`explode`), opToken(explodeOpType)) lexer.Add([]byte(`or`), opToken(orOpType)) lexer.Add([]byte(`and`), opToken(andOpType)) diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 7c2d82c3..c87796b8 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -113,6 +113,7 @@ var selectOpType = &operationType{Type: "SELECT", NumArgs: 1, Precedence: 50, Ha var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler: hasOperator} var uniqueOpType = &operationType{Type: "UNIQUE", NumArgs: 0, Precedence: 50, Handler: unique} var uniqueByOpType = &operationType{Type: "UNIQUE_BY", NumArgs: 1, Precedence: 50, Handler: uniqueBy} +var groupByOpType = &operationType{Type: "GROUP_BY", NumArgs: 1, Precedence: 50, Handler: groupBy} var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator} type Operation struct { diff --git a/pkg/yqlib/operator_group_by.go b/pkg/yqlib/operator_group_by.go new file mode 100644 index 00000000..d15496d1 --- /dev/null +++ b/pkg/yqlib/operator_group_by.go @@ -0,0 +1,76 @@ +package yqlib + +import ( + "container/list" + "fmt" + + "github.com/elliotchance/orderedmap" + yaml "gopkg.in/yaml.v3" +) + +func processIntoGroups(d *dataTreeNavigator, context Context, rhsExp *ExpressionNode, node *yaml.Node) (*orderedmap.OrderedMap, error) { + var newMatches = orderedmap.NewOrderedMap() + for _, node := range node.Content { + child := &CandidateNode{Node: node} + rhs, err := d.GetMatchingNodes(context.SingleReadonlyChildContext(child), rhsExp) + + if err != nil { + return nil, err + } + + keyValue := "null" + + if rhs.MatchingNodes.Len() > 0 { + first := rhs.MatchingNodes.Front() + keyCandidate := first.Value.(*CandidateNode) + keyValue = keyCandidate.Node.Value + } + + groupList, exists := newMatches.Get(keyValue) + + if !exists { + groupList = list.New() + newMatches.Set(keyValue, groupList) + } + groupList.(*list.List).PushBack(node) + } + return newMatches, nil +} + +func groupBy(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { + + log.Debugf("-- groupBy Operator") + var results = list.New() + + for el := context.MatchingNodes.Front(); el != nil; el = el.Next() { + candidate := el.Value.(*CandidateNode) + candidateNode := unwrapDoc(candidate.Node) + + if candidateNode.Kind != yaml.SequenceNode { + return Context{}, fmt.Errorf("Only arrays are supported for group by") + } + + newMatches, err := processIntoGroups(d, context, expressionNode.Rhs, candidateNode) + + if err != nil { + return Context{}, err + } + + resultNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"} + for groupEl := newMatches.Front(); groupEl != nil; groupEl = groupEl.Next() { + groupResultNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"} + groupList := groupEl.Value.(*list.List) + for groupItem := groupList.Front(); groupItem != nil; groupItem = groupItem.Next() { + groupResultNode.Content = append(groupResultNode.Content, groupItem.Value.(*yaml.Node)) + } + + resultNode.Content = append(resultNode.Content, groupResultNode) + } + + results.PushBack(candidate.CreateChild(nil, resultNode)) + + } + + return context.ChildContext(results), nil + +} diff --git a/pkg/yqlib/operator_group_by_test.go b/pkg/yqlib/operator_group_by_test.go new file mode 100644 index 00000000..c10e3bb4 --- /dev/null +++ b/pkg/yqlib/operator_group_by_test.go @@ -0,0 +1,31 @@ +package yqlib + +import ( + "testing" +) + +var groupByOperatorScenarios = []expressionScenario{ + { + description: "Group by field", + document: `[{foo: 1, bar: 10}, {foo: 3, bar: 100}, {foo: 1, bar: 1}]`, + expression: `group_by(.foo)`, + expected: []string{ + "D0, P[], (!!seq)::- - {foo: 1, bar: 10}\n - {foo: 1, bar: 1}\n- - {foo: 3, bar: 100}\n", + }, + }, + { + description: "Group by field, with nuls", + document: `[{cat: dog}, {foo: 1, bar: 10}, {foo: 3, bar: 100}, {no: foo for you}, {foo: 1, bar: 1}]`, + expression: `group_by(.foo)`, + expected: []string{ + "D0, P[], (!!seq)::- - {cat: dog}\n - {no: foo for you}\n- - {foo: 1, bar: 10}\n - {foo: 1, bar: 1}\n- - {foo: 3, bar: 100}\n", + }, + }, +} + +func TestGroupByOperatorScenarios(t *testing.T) { + for _, tt := range groupByOperatorScenarios { + testScenario(t, &tt) + } + documentScenarios(t, "Group By", groupByOperatorScenarios) +}