pivot operator (#1993)

This commit is contained in:
Matt Benson 2024-03-29 21:29:35 -05:00 committed by GitHub
parent 2ee7508b76
commit 3283c65dc4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 292 additions and 0 deletions

View File

@ -0,0 +1,3 @@
# Pivot
Emulates the `PIVOT` function supported by several popular RDBMS systems.

View File

@ -0,0 +1,117 @@
# Pivot
Emulates the `PIVOT` function supported by several popular RDBMS systems.
## Pivot a sequence of sequences
Given a sample.yml file of:
```yaml
- - foo
- bar
- baz
- - sis
- boom
- bah
```
then
```bash
yq 'pivot' sample.yml
```
will output
```yaml
- - foo
- sis
- - bar
- boom
- - baz
- bah
```
## Pivot sequence of heterogeneous sequences
Missing values are "padded" to null.
Given a sample.yml file of:
```yaml
- - foo
- bar
- baz
- - sis
- boom
- bah
- blah
```
then
```bash
yq 'pivot' sample.yml
```
will output
```yaml
- - foo
- sis
- - bar
- boom
- - baz
- bah
- -
- blah
```
## Pivot sequence of maps
Given a sample.yml file of:
```yaml
- foo: a
bar: b
baz: c
- foo: x
bar: y
baz: z
```
then
```bash
yq 'pivot' sample.yml
```
will output
```yaml
foo:
- a
- x
bar:
- b
- y
baz:
- c
- z
```
## Pivot sequence of heterogeneous maps
Missing values are "padded" to null.
Given a sample.yml file of:
```yaml
- foo: a
bar: b
baz: c
- foo: x
bar: y
baz: z
what: ever
```
then
```bash
yq 'pivot' sample.yml
```
will output
```yaml
foo:
- a
- x
bar:
- b
- y
baz:
- c
- z
what:
-
- ever
```

View File

@ -224,6 +224,8 @@ var participleYqRules = []*participleYqRule{
{"SubtractAssign", `\-=`, opToken(subtractAssignOpType), 0}, {"SubtractAssign", `\-=`, opToken(subtractAssignOpType), 0},
{"Subtract", `\-`, opToken(subtractOpType), 0}, {"Subtract", `\-`, opToken(subtractOpType), 0},
{"Comment", `#.*`, nil, 0}, {"Comment", `#.*`, nil, 0},
simpleOp("pivot", pivotOpType),
} }
type yqAction func(lexer.Token) (*token, error) type yqAction func(lexer.Token) (*token, error)

View File

@ -190,6 +190,8 @@ var groupByOpType = &operationType{Type: "GROUP_BY", NumArgs: 1, Precedence: 50,
var flattenOpType = &operationType{Type: "FLATTEN_BY", NumArgs: 0, Precedence: 50, Handler: flattenOp} var flattenOpType = &operationType{Type: "FLATTEN_BY", NumArgs: 0, Precedence: 50, Handler: flattenOp}
var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator} var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator}
var pivotOpType = &operationType{Type: "PIVOT", NumArgs: 0, Precedence: 50, Handler: pivotOperator}
// debugging purposes only // debugging purposes only
func (p *Operation) toString() string { func (p *Operation) toString() string {
if p == nil { if p == nil {

121
pkg/yqlib/operator_pivot.go Normal file
View File

@ -0,0 +1,121 @@
package yqlib
import (
"container/list"
"fmt"
)
func getUniqueElementTag(seq *CandidateNode) (string, error) {
switch l := len(seq.Content); l {
case 0:
return "", nil
default:
result := seq.Content[0].Tag
for i := 1; i < l; i++ {
t := seq.Content[i].Tag
if t != result {
return "", fmt.Errorf("sequence contains elements of %v and %v types", result, t)
}
}
return result, nil
}
}
var nullNodeFactory = func() *CandidateNode { return createScalarNode(nil, "") }
func pad[E any](array []E, length int, factory func() E) []E {
sz := len(array)
if sz >= length {
return array
}
pad := make([]E, length-sz)
for i := 0; i < len(pad); i++ {
pad[i] = factory()
}
return append(array, pad...)
}
func pivotSequences(seq *CandidateNode) *CandidateNode {
sz := len(seq.Content)
if sz == 0 {
return seq
}
m := make(map[int][]*CandidateNode)
for i := 0; i < sz; i++ {
row := seq.Content[i]
for j := 0; j < len(row.Content); j++ {
e := m[j]
if e == nil {
e = make([]*CandidateNode, 0, sz)
}
m[j] = append(pad(e, i, nullNodeFactory), row.Content[j])
}
}
result := CandidateNode{Kind: SequenceNode}
for i := 0; i < len(m); i++ {
e := CandidateNode{Kind: SequenceNode}
e.AddChildren(pad(m[i], sz, nullNodeFactory))
result.AddChild(&e)
}
return &result
}
func pivotMaps(seq *CandidateNode) *CandidateNode {
sz := len(seq.Content)
if sz == 0 {
return &CandidateNode{Kind: MappingNode}
}
m := make(map[string][]*CandidateNode)
keys := make([]string, 0)
for i := 0; i < sz; i++ {
row := seq.Content[i]
for j := 0; j < len(row.Content); j += 2 {
k := row.Content[j].Value
v := row.Content[j+1]
e := m[k]
if e == nil {
keys = append(keys, k)
e = make([]*CandidateNode, 0, sz)
}
m[k] = append(pad(e, i, nullNodeFactory), v)
}
}
result := CandidateNode{Kind: MappingNode}
for _, k := range keys {
pivotRow := CandidateNode{Kind: SequenceNode}
pivotRow.AddChildren(
pad(m[k], sz, nullNodeFactory))
result.AddKeyValueChild(createScalarNode(k, k), &pivotRow)
}
return &result
}
func pivotOperator(_ *dataTreeNavigator, context Context, _ *ExpressionNode) (Context, error) {
log.Debug("Pivot")
results := list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
if candidate.Tag != "!!seq" {
return Context{}, fmt.Errorf("cannot pivot node of type %v", candidate.Tag)
}
tag, err := getUniqueElementTag(candidate)
if err != nil {
return Context{}, err
}
var pivot *CandidateNode
switch tag {
case "!!seq":
pivot = pivotSequences(candidate)
case "!!map":
pivot = pivotMaps(candidate)
default:
return Context{}, fmt.Errorf("can only pivot elements of !!seq or !!map types, received %v", tag)
}
results.PushBack(pivot)
}
return context.ChildContext(results), nil
}

View File

@ -0,0 +1,47 @@
package yqlib
import "testing"
var pivotOperatorScenarios = []expressionScenario{
{
description: "Pivot a sequence of sequences",
document: "[[foo, bar, baz], [sis, boom, bah]]\n",
expression: `pivot`,
expected: []string{
"D0, P[], ()::- - foo\n - sis\n- - bar\n - boom\n- - baz\n - bah\n",
},
},
{
description: "Pivot sequence of heterogeneous sequences",
subdescription: `Missing values are "padded" to null.`,
document: "[[foo, bar, baz], [sis, boom, bah, blah]]\n",
expression: `pivot`,
expected: []string{
"D0, P[], ()::- - foo\n - sis\n- - bar\n - boom\n- - baz\n - bah\n- -\n - blah\n",
},
},
{
description: "Pivot sequence of maps",
document: "[{foo: a, bar: b, baz: c}, {foo: x, bar: y, baz: z}]\n",
expression: `pivot`,
expected: []string{
"D0, P[], ()::foo:\n - a\n - x\nbar:\n - b\n - y\nbaz:\n - c\n - z\n",
},
},
{
description: "Pivot sequence of heterogeneous maps",
subdescription: `Missing values are "padded" to null.`,
document: "[{foo: a, bar: b, baz: c}, {foo: x, bar: y, baz: z, what: ever}]\n",
expression: `pivot`,
expected: []string{
"D0, P[], ()::foo:\n - a\n - x\nbar:\n - b\n - y\nbaz:\n - c\n - z\nwhat:\n -\n - ever\n",
},
},
}
func TestPivotOperatorScenarios(t *testing.T) {
for _, tt := range pivotOperatorScenarios {
testScenario(t, &tt)
}
documentOperatorScenarios(t, "pivot", pivotOperatorScenarios)
}