From 3283c65dc4316b0a9dbb248c1d8d4b1730e30f3f Mon Sep 17 00:00:00 2001 From: Matt Benson Date: Fri, 29 Mar 2024 21:29:35 -0500 Subject: [PATCH] pivot operator (#1993) --- pkg/yqlib/doc/operators/headers/pivot.md | 3 + pkg/yqlib/doc/operators/pivot.md | 117 ++++++++++++++++++++++ pkg/yqlib/lexer_participle.go | 2 + pkg/yqlib/operation.go | 2 + pkg/yqlib/operator_pivot.go | 121 +++++++++++++++++++++++ pkg/yqlib/operator_pivot_test.go | 47 +++++++++ 6 files changed, 292 insertions(+) create mode 100644 pkg/yqlib/doc/operators/headers/pivot.md create mode 100644 pkg/yqlib/doc/operators/pivot.md create mode 100644 pkg/yqlib/operator_pivot.go create mode 100644 pkg/yqlib/operator_pivot_test.go diff --git a/pkg/yqlib/doc/operators/headers/pivot.md b/pkg/yqlib/doc/operators/headers/pivot.md new file mode 100644 index 00000000..de9ad197 --- /dev/null +++ b/pkg/yqlib/doc/operators/headers/pivot.md @@ -0,0 +1,3 @@ +# Pivot + +Emulates the `PIVOT` function supported by several popular RDBMS systems. diff --git a/pkg/yqlib/doc/operators/pivot.md b/pkg/yqlib/doc/operators/pivot.md new file mode 100644 index 00000000..62b1ba3a --- /dev/null +++ b/pkg/yqlib/doc/operators/pivot.md @@ -0,0 +1,117 @@ +# Pivot + +Emulates the `PIVOT` function supported by several popular RDBMS systems. + +## Pivot a sequence of sequences +Given a sample.yml file of: +```yaml +- - foo + - bar + - baz +- - sis + - boom + - bah +``` +then +```bash +yq 'pivot' sample.yml +``` +will output +```yaml +- - foo + - sis +- - bar + - boom +- - baz + - bah +``` + +## Pivot sequence of heterogeneous sequences +Missing values are "padded" to null. + +Given a sample.yml file of: +```yaml +- - foo + - bar + - baz +- - sis + - boom + - bah + - blah +``` +then +```bash +yq 'pivot' sample.yml +``` +will output +```yaml +- - foo + - sis +- - bar + - boom +- - baz + - bah +- - + - blah +``` + +## Pivot sequence of maps +Given a sample.yml file of: +```yaml +- foo: a + bar: b + baz: c +- foo: x + bar: y + baz: z +``` +then +```bash +yq 'pivot' sample.yml +``` +will output +```yaml +foo: + - a + - x +bar: + - b + - y +baz: + - c + - z +``` + +## Pivot sequence of heterogeneous maps +Missing values are "padded" to null. + +Given a sample.yml file of: +```yaml +- foo: a + bar: b + baz: c +- foo: x + bar: y + baz: z + what: ever +``` +then +```bash +yq 'pivot' sample.yml +``` +will output +```yaml +foo: + - a + - x +bar: + - b + - y +baz: + - c + - z +what: + - + - ever +``` + diff --git a/pkg/yqlib/lexer_participle.go b/pkg/yqlib/lexer_participle.go index 2f65bddc..4b58d7ce 100644 --- a/pkg/yqlib/lexer_participle.go +++ b/pkg/yqlib/lexer_participle.go @@ -224,6 +224,8 @@ var participleYqRules = []*participleYqRule{ {"SubtractAssign", `\-=`, opToken(subtractAssignOpType), 0}, {"Subtract", `\-`, opToken(subtractOpType), 0}, {"Comment", `#.*`, nil, 0}, + + simpleOp("pivot", pivotOpType), } type yqAction func(lexer.Token) (*token, error) diff --git a/pkg/yqlib/operation.go b/pkg/yqlib/operation.go index df89247c..be1f6230 100644 --- a/pkg/yqlib/operation.go +++ b/pkg/yqlib/operation.go @@ -190,6 +190,8 @@ var groupByOpType = &operationType{Type: "GROUP_BY", NumArgs: 1, Precedence: 50, var flattenOpType = &operationType{Type: "FLATTEN_BY", NumArgs: 0, Precedence: 50, Handler: flattenOp} var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator} +var pivotOpType = &operationType{Type: "PIVOT", NumArgs: 0, Precedence: 50, Handler: pivotOperator} + // debugging purposes only func (p *Operation) toString() string { if p == nil { diff --git a/pkg/yqlib/operator_pivot.go b/pkg/yqlib/operator_pivot.go new file mode 100644 index 00000000..095f2a7f --- /dev/null +++ b/pkg/yqlib/operator_pivot.go @@ -0,0 +1,121 @@ +package yqlib + +import ( + "container/list" + "fmt" +) + +func getUniqueElementTag(seq *CandidateNode) (string, error) { + switch l := len(seq.Content); l { + case 0: + return "", nil + default: + result := seq.Content[0].Tag + for i := 1; i < l; i++ { + t := seq.Content[i].Tag + if t != result { + return "", fmt.Errorf("sequence contains elements of %v and %v types", result, t) + } + } + return result, nil + } +} + +var nullNodeFactory = func() *CandidateNode { return createScalarNode(nil, "") } + +func pad[E any](array []E, length int, factory func() E) []E { + sz := len(array) + if sz >= length { + return array + } + pad := make([]E, length-sz) + for i := 0; i < len(pad); i++ { + pad[i] = factory() + } + return append(array, pad...) +} + +func pivotSequences(seq *CandidateNode) *CandidateNode { + sz := len(seq.Content) + if sz == 0 { + return seq + } + m := make(map[int][]*CandidateNode) + + for i := 0; i < sz; i++ { + row := seq.Content[i] + for j := 0; j < len(row.Content); j++ { + e := m[j] + if e == nil { + e = make([]*CandidateNode, 0, sz) + } + m[j] = append(pad(e, i, nullNodeFactory), row.Content[j]) + } + } + result := CandidateNode{Kind: SequenceNode} + + for i := 0; i < len(m); i++ { + e := CandidateNode{Kind: SequenceNode} + e.AddChildren(pad(m[i], sz, nullNodeFactory)) + result.AddChild(&e) + } + return &result +} + +func pivotMaps(seq *CandidateNode) *CandidateNode { + sz := len(seq.Content) + if sz == 0 { + return &CandidateNode{Kind: MappingNode} + } + m := make(map[string][]*CandidateNode) + keys := make([]string, 0) + + for i := 0; i < sz; i++ { + row := seq.Content[i] + for j := 0; j < len(row.Content); j += 2 { + k := row.Content[j].Value + v := row.Content[j+1] + e := m[k] + if e == nil { + keys = append(keys, k) + e = make([]*CandidateNode, 0, sz) + } + m[k] = append(pad(e, i, nullNodeFactory), v) + } + } + result := CandidateNode{Kind: MappingNode} + for _, k := range keys { + pivotRow := CandidateNode{Kind: SequenceNode} + pivotRow.AddChildren( + pad(m[k], sz, nullNodeFactory)) + result.AddKeyValueChild(createScalarNode(k, k), &pivotRow) + } + return &result +} + +func pivotOperator(_ *dataTreeNavigator, context Context, _ *ExpressionNode) (Context, error) { + log.Debug("Pivot") + results := list.New() + + for el := context.MatchingNodes.Front(); el != nil; el = el.Next() { + candidate := el.Value.(*CandidateNode) + if candidate.Tag != "!!seq" { + return Context{}, fmt.Errorf("cannot pivot node of type %v", candidate.Tag) + } + tag, err := getUniqueElementTag(candidate) + if err != nil { + return Context{}, err + } + var pivot *CandidateNode + switch tag { + case "!!seq": + pivot = pivotSequences(candidate) + case "!!map": + pivot = pivotMaps(candidate) + default: + return Context{}, fmt.Errorf("can only pivot elements of !!seq or !!map types, received %v", tag) + } + results.PushBack(pivot) + } + return context.ChildContext(results), nil +} diff --git a/pkg/yqlib/operator_pivot_test.go b/pkg/yqlib/operator_pivot_test.go new file mode 100644 index 00000000..d4ca51d4 --- /dev/null +++ b/pkg/yqlib/operator_pivot_test.go @@ -0,0 +1,47 @@ +package yqlib + +import "testing" + +var pivotOperatorScenarios = []expressionScenario{ + { + description: "Pivot a sequence of sequences", + document: "[[foo, bar, baz], [sis, boom, bah]]\n", + expression: `pivot`, + expected: []string{ + "D0, P[], ()::- - foo\n - sis\n- - bar\n - boom\n- - baz\n - bah\n", + }, + }, + { + description: "Pivot sequence of heterogeneous sequences", + subdescription: `Missing values are "padded" to null.`, + document: "[[foo, bar, baz], [sis, boom, bah, blah]]\n", + expression: `pivot`, + expected: []string{ + "D0, P[], ()::- - foo\n - sis\n- - bar\n - boom\n- - baz\n - bah\n- -\n - blah\n", + }, + }, + { + description: "Pivot sequence of maps", + document: "[{foo: a, bar: b, baz: c}, {foo: x, bar: y, baz: z}]\n", + expression: `pivot`, + expected: []string{ + "D0, P[], ()::foo:\n - a\n - x\nbar:\n - b\n - y\nbaz:\n - c\n - z\n", + }, + }, + { + description: "Pivot sequence of heterogeneous maps", + subdescription: `Missing values are "padded" to null.`, + document: "[{foo: a, bar: b, baz: c}, {foo: x, bar: y, baz: z, what: ever}]\n", + expression: `pivot`, + expected: []string{ + "D0, P[], ()::foo:\n - a\n - x\nbar:\n - b\n - y\nbaz:\n - c\n - z\nwhat:\n -\n - ever\n", + }, + }, +} + +func TestPivotOperatorScenarios(t *testing.T) { + for _, tt := range pivotOperatorScenarios { + testScenario(t, &tt) + } + documentOperatorScenarios(t, "pivot", pivotOperatorScenarios) +}