Added flatten operator

This commit is contained in:
Mike Farah 2021-10-26 15:42:25 +11:00
parent 2491051fd9
commit 80084e89cc
7 changed files with 210 additions and 8 deletions

70
pkg/yqlib/doc/Flatten.md Normal file
View File

@ -0,0 +1,70 @@
This recursively flattens arrays.
## Flatten
Recursively flattens all arrays
Given a sample.yml file of:
```yaml
- 1
- - 2
- - - 3
```
then
```bash
yq eval 'flatten' sample.yml
```
will output
```yaml
- 1
- 2
- 3
```
## Flatten with depth of one
Given a sample.yml file of:
```yaml
- 1
- - 2
- - - 3
```
then
```bash
yq eval 'flatten(1)' sample.yml
```
will output
```yaml
- 1
- 2
- - 3
```
## Flatten empty array
Given a sample.yml file of:
```yaml
- []
```
then
```bash
yq eval 'flatten' sample.yml
```
will output
```yaml
[]
```
## Flatten array of objects
Given a sample.yml file of:
```yaml
- foo: bar
- - foo: baz
```
then
```bash
yq eval 'flatten' sample.yml
```
will output
```yaml
- foo: bar
- foo: baz
```

View File

@ -0,0 +1 @@
This recursively flattens arrays.

View File

@ -131,20 +131,40 @@ func opTokenWithPrefs(op *operationType, assignOpType *operationType, preference
}
}
func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action {
func extractNumberParamter(value string) (int, error) {
parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`)
matches := parameterParser.FindStringSubmatch(value)
var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32) // nolint
if errParsingInt != nil {
return 0, errParsingInt
}
return int(indent), nil
}
func flattenWithDepth() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`)
value := string(m.Bytes)
matches := parameterParser.FindStringSubmatch(value)
var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32) // nolint
var depth, errParsingInt = extractNumberParamter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := encoderPreferences{format: outputFormat, indent: int(indent)}
op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value + "i " + matches[1], Preferences: prefs}
prefs := flattenPreferences{depth: depth}
op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
var indent, errParsingInt = extractNumberParamter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := encoderPreferences{format: outputFormat, indent: indent}
op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
@ -292,6 +312,9 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`:\s*`), opToken(createMapOpType))
lexer.Add([]byte(`length`), opToken(lengthOpType))
lexer.Add([]byte(`flatten\([0-9]+\)`), flattenWithDepth())
lexer.Add([]byte(`flatten`), opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1}))
lexer.Add([]byte(`toyaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat))
lexer.Add([]byte(`to_yaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat))

View File

@ -114,6 +114,7 @@ var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler:
var uniqueOpType = &operationType{Type: "UNIQUE", NumArgs: 0, Precedence: 50, Handler: unique}
var uniqueByOpType = &operationType{Type: "UNIQUE_BY", NumArgs: 1, Precedence: 50, Handler: uniqueBy}
var groupByOpType = &operationType{Type: "GROUP_BY", NumArgs: 1, Precedence: 50, Handler: groupBy}
var flattenOpType = &operationType{Type: "FLATTEN_BY", NumArgs: 0, Precedence: 50, Handler: flattenOp}
var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator}
type Operation struct {

View File

@ -0,0 +1,54 @@
package yqlib
import (
"fmt"
yaml "gopkg.in/yaml.v3"
)
type flattenPreferences struct {
depth int
}
func flatten(node *yaml.Node, depth int) {
if depth == 0 {
return
}
if node.Kind != yaml.SequenceNode {
return
}
content := node.Content
newSeq := make([]*yaml.Node, 0)
for i := 0; i < len(content); i++ {
if content[i].Kind == yaml.SequenceNode {
flatten(content[i], depth-1)
for j := 0; j < len(content[i].Content); j++ {
newSeq = append(newSeq, content[i].Content[j])
}
} else {
newSeq = append(newSeq, content[i])
}
}
node.Content = newSeq
}
func flattenOp(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debugf("-- flatten Operator")
depth := expressionNode.Operation.Preferences.(flattenPreferences).depth
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
candidateNode := unwrapDoc(candidate.Node)
if candidateNode.Kind != yaml.SequenceNode {
return Context{}, fmt.Errorf("Only arrays are supported for group by")
}
flatten(candidateNode, depth)
}
return context, nil
}

View File

@ -0,0 +1,48 @@
package yqlib
import (
"testing"
)
var flattenOperatorScenarios = []expressionScenario{
{
description: "Flatten",
subdescription: "Recursively flattens all arrays",
document: `[1, [2], [[3]]]`,
expression: `flatten`,
expected: []string{
"D0, P[], (doc)::[1, 2, 3]\n",
},
},
{
description: "Flatten with depth of one",
document: `[1, [2], [[3]]]`,
expression: `flatten(1)`,
expected: []string{
"D0, P[], (doc)::[1, 2, [3]]\n",
},
},
{
description: "Flatten empty array",
document: `[[]]`,
expression: `flatten`,
expected: []string{
"D0, P[], (doc)::[]\n",
},
},
{
description: "Flatten array of objects",
document: `[{foo: bar}, [{foo: baz}]]`,
expression: `flatten`,
expected: []string{
"D0, P[], (doc)::[{foo: bar}, {foo: baz}]\n",
},
},
}
func TestFlattenOperatorScenarios(t *testing.T) {
for _, tt := range flattenOperatorScenarios {
testScenario(t, &tt)
}
documentScenarios(t, "Flatten", flattenOperatorScenarios)
}

View File

@ -1,3 +1,8 @@
4.14.1:
- Added group_by operator
- Added encode/decode operators (toyaml, fromjson etc)
- Added flatten operator
4.13.5:
- Performance improvement for deepMatch (thanks @pmatseykanets)
- Added manpage, included in tar.gz downloads as well as a separate tar.gz (#961)