From 80084e89cc3c3d8f6d0243cbb66ec109b6a2b581 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Tue, 26 Oct 2021 15:42:25 +1100 Subject: [PATCH] Added flatten operator --- pkg/yqlib/doc/Flatten.md | 70 ++++++++++++++++++++++++++++++ pkg/yqlib/doc/headers/Flatten.md | 1 + pkg/yqlib/expression_tokeniser.go | 39 +++++++++++++---- pkg/yqlib/lib.go | 1 + pkg/yqlib/operator_flatten.go | 54 +++++++++++++++++++++++ pkg/yqlib/operator_flatten_test.go | 48 ++++++++++++++++++++ release_notes.txt | 5 +++ 7 files changed, 210 insertions(+), 8 deletions(-) create mode 100644 pkg/yqlib/doc/Flatten.md create mode 100644 pkg/yqlib/doc/headers/Flatten.md create mode 100644 pkg/yqlib/operator_flatten.go create mode 100644 pkg/yqlib/operator_flatten_test.go diff --git a/pkg/yqlib/doc/Flatten.md b/pkg/yqlib/doc/Flatten.md new file mode 100644 index 00000000..7a741136 --- /dev/null +++ b/pkg/yqlib/doc/Flatten.md @@ -0,0 +1,70 @@ +This recursively flattens arrays. + +## Flatten +Recursively flattens all arrays + +Given a sample.yml file of: +```yaml +- 1 +- - 2 +- - - 3 +``` +then +```bash +yq eval 'flatten' sample.yml +``` +will output +```yaml +- 1 +- 2 +- 3 +``` + +## Flatten with depth of one +Given a sample.yml file of: +```yaml +- 1 +- - 2 +- - - 3 +``` +then +```bash +yq eval 'flatten(1)' sample.yml +``` +will output +```yaml +- 1 +- 2 +- - 3 +``` + +## Flatten empty array +Given a sample.yml file of: +```yaml +- [] +``` +then +```bash +yq eval 'flatten' sample.yml +``` +will output +```yaml +[] +``` + +## Flatten array of objects +Given a sample.yml file of: +```yaml +- foo: bar +- - foo: baz +``` +then +```bash +yq eval 'flatten' sample.yml +``` +will output +```yaml +- foo: bar +- foo: baz +``` + diff --git a/pkg/yqlib/doc/headers/Flatten.md b/pkg/yqlib/doc/headers/Flatten.md new file mode 100644 index 00000000..e195ca4e --- /dev/null +++ b/pkg/yqlib/doc/headers/Flatten.md @@ -0,0 +1 @@ +This recursively flattens arrays. diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go index 8f7db58e..0b1edd28 100644 --- a/pkg/yqlib/expression_tokeniser.go +++ b/pkg/yqlib/expression_tokeniser.go @@ -131,20 +131,40 @@ func opTokenWithPrefs(op *operationType, assignOpType *operationType, preference } } -func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action { +func extractNumberParamter(value string) (int, error) { + parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`) + matches := parameterParser.FindStringSubmatch(value) + var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32) // nolint + if errParsingInt != nil { + return 0, errParsingInt + } + return int(indent), nil +} + +func flattenWithDepth() lex.Action { return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { - - parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`) value := string(m.Bytes) - matches := parameterParser.FindStringSubmatch(value) - - var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32) // nolint + var depth, errParsingInt = extractNumberParamter(value) if errParsingInt != nil { return nil, errParsingInt } - prefs := encoderPreferences{format: outputFormat, indent: int(indent)} - op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value + "i " + matches[1], Preferences: prefs} + prefs := flattenPreferences{depth: depth} + op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs} + return &token{TokenType: operationToken, Operation: op}, nil + } +} + +func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action { + return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { + value := string(m.Bytes) + var indent, errParsingInt = extractNumberParamter(value) + if errParsingInt != nil { + return nil, errParsingInt + } + + prefs := encoderPreferences{format: outputFormat, indent: indent} + op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs} return &token{TokenType: operationToken, Operation: op}, nil } } @@ -292,6 +312,9 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`:\s*`), opToken(createMapOpType)) lexer.Add([]byte(`length`), opToken(lengthOpType)) + lexer.Add([]byte(`flatten\([0-9]+\)`), flattenWithDepth()) + lexer.Add([]byte(`flatten`), opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1})) + lexer.Add([]byte(`toyaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat)) lexer.Add([]byte(`to_yaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat)) diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index c87796b8..e0d4b36b 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -114,6 +114,7 @@ var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler: var uniqueOpType = &operationType{Type: "UNIQUE", NumArgs: 0, Precedence: 50, Handler: unique} var uniqueByOpType = &operationType{Type: "UNIQUE_BY", NumArgs: 1, Precedence: 50, Handler: uniqueBy} var groupByOpType = &operationType{Type: "GROUP_BY", NumArgs: 1, Precedence: 50, Handler: groupBy} +var flattenOpType = &operationType{Type: "FLATTEN_BY", NumArgs: 0, Precedence: 50, Handler: flattenOp} var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator} type Operation struct { diff --git a/pkg/yqlib/operator_flatten.go b/pkg/yqlib/operator_flatten.go new file mode 100644 index 00000000..35c91ae8 --- /dev/null +++ b/pkg/yqlib/operator_flatten.go @@ -0,0 +1,54 @@ +package yqlib + +import ( + "fmt" + + yaml "gopkg.in/yaml.v3" +) + +type flattenPreferences struct { + depth int +} + +func flatten(node *yaml.Node, depth int) { + if depth == 0 { + return + } + if node.Kind != yaml.SequenceNode { + return + } + content := node.Content + newSeq := make([]*yaml.Node, 0) + + for i := 0; i < len(content); i++ { + if content[i].Kind == yaml.SequenceNode { + flatten(content[i], depth-1) + for j := 0; j < len(content[i].Content); j++ { + newSeq = append(newSeq, content[i].Content[j]) + } + } else { + newSeq = append(newSeq, content[i]) + } + } + node.Content = newSeq +} + +func flattenOp(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { + + log.Debugf("-- flatten Operator") + depth := expressionNode.Operation.Preferences.(flattenPreferences).depth + + for el := context.MatchingNodes.Front(); el != nil; el = el.Next() { + candidate := el.Value.(*CandidateNode) + candidateNode := unwrapDoc(candidate.Node) + if candidateNode.Kind != yaml.SequenceNode { + return Context{}, fmt.Errorf("Only arrays are supported for group by") + } + + flatten(candidateNode, depth) + + } + + return context, nil + +} diff --git a/pkg/yqlib/operator_flatten_test.go b/pkg/yqlib/operator_flatten_test.go new file mode 100644 index 00000000..94efea7b --- /dev/null +++ b/pkg/yqlib/operator_flatten_test.go @@ -0,0 +1,48 @@ +package yqlib + +import ( + "testing" +) + +var flattenOperatorScenarios = []expressionScenario{ + { + description: "Flatten", + subdescription: "Recursively flattens all arrays", + document: `[1, [2], [[3]]]`, + expression: `flatten`, + expected: []string{ + "D0, P[], (doc)::[1, 2, 3]\n", + }, + }, + { + description: "Flatten with depth of one", + document: `[1, [2], [[3]]]`, + expression: `flatten(1)`, + expected: []string{ + "D0, P[], (doc)::[1, 2, [3]]\n", + }, + }, + { + description: "Flatten empty array", + document: `[[]]`, + expression: `flatten`, + expected: []string{ + "D0, P[], (doc)::[]\n", + }, + }, + { + description: "Flatten array of objects", + document: `[{foo: bar}, [{foo: baz}]]`, + expression: `flatten`, + expected: []string{ + "D0, P[], (doc)::[{foo: bar}, {foo: baz}]\n", + }, + }, +} + +func TestFlattenOperatorScenarios(t *testing.T) { + for _, tt := range flattenOperatorScenarios { + testScenario(t, &tt) + } + documentScenarios(t, "Flatten", flattenOperatorScenarios) +} diff --git a/release_notes.txt b/release_notes.txt index d0b552cd..0e940c1d 100644 --- a/release_notes.txt +++ b/release_notes.txt @@ -1,3 +1,8 @@ +4.14.1: + - Added group_by operator + - Added encode/decode operators (toyaml, fromjson etc) + - Added flatten operator + 4.13.5: - Performance improvement for deepMatch (thanks @pmatseykanets) - Added manpage, included in tar.gz downloads as well as a separate tar.gz (#961)