Added unique operator

This commit is contained in:
Mike Farah 2021-05-14 09:43:52 +10:00
parent aa95ecd012
commit 8627441705
6 changed files with 196 additions and 0 deletions

82
pkg/yqlib/doc/Unique.md Normal file
View File

@ -0,0 +1,82 @@
This is used to filter out duplicated items in an array.
## Unique array of scalars (string/numbers)
Given a sample.yml file of:
```yaml
- 1
- 2
- 3
- 2
```
then
```bash
yq eval 'unique' sample.yml
```
will output
```yaml
- 1
- 2
- 3
```
## Unique nulls
Unique works on the node value, so it considers different representations of nulls to be different
Given a sample.yml file of:
```yaml
- ~
- null
- ~
- null
```
then
```bash
yq eval 'unique' sample.yml
```
will output
```yaml
- ~
- null
```
## Unique all nulls
Run against the node tag to unique all the nulls
Given a sample.yml file of:
```yaml
- ~
- null
- ~
- null
```
then
```bash
yq eval 'unique_by(tag)' sample.yml
```
will output
```yaml
- ~
```
## Unique array object fields
Given a sample.yml file of:
```yaml
- name: harry
pet: cat
- name: billy
pet: dog
- name: harry
pet: dog
```
then
```bash
yq eval 'unique_by(.name)' sample.yml
```
will output
```yaml
- name: harry
pet: cat
- name: billy
pet: dog
```

View File

@ -0,0 +1 @@
This is used to filter out duplicated items in an array.

View File

@ -259,6 +259,8 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType)) lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType))
lexer.Add([]byte(`select`), opToken(selectOpType)) lexer.Add([]byte(`select`), opToken(selectOpType))
lexer.Add([]byte(`has`), opToken(hasOpType)) lexer.Add([]byte(`has`), opToken(hasOpType))
lexer.Add([]byte(`unique`), opToken(uniqueOpType))
lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType))
lexer.Add([]byte(`explode`), opToken(explodeOpType)) lexer.Add([]byte(`explode`), opToken(explodeOpType))
lexer.Add([]byte(`or`), opToken(orOpType)) lexer.Add([]byte(`or`), opToken(orOpType))
lexer.Add([]byte(`and`), opToken(andOpType)) lexer.Add([]byte(`and`), opToken(andOpType))

View File

@ -99,6 +99,8 @@ var recursiveDescentOpType = &operationType{Type: "RECURSIVE_DESCENT", NumArgs:
var selectOpType = &operationType{Type: "SELECT", NumArgs: 1, Precedence: 50, Handler: selectOperator} var selectOpType = &operationType{Type: "SELECT", NumArgs: 1, Precedence: 50, Handler: selectOperator}
var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler: hasOperator} var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler: hasOperator}
var uniqueOpType = &operationType{Type: "UNIQUE", NumArgs: 0, Precedence: 50, Handler: unique}
var uniqueByOpType = &operationType{Type: "UNIQUE_BY", NumArgs: 1, Precedence: 50, Handler: uniqueBy}
var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator} var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator}
type Operation struct { type Operation struct {

View File

@ -0,0 +1,59 @@
package yqlib
import (
"github.com/elliotchance/orderedmap"
"container/list"
yaml "gopkg.in/yaml.v3"
"fmt"
)
func unique(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
selfExpression := &ExpressionNode{Operation: &Operation{OperationType: selfReferenceOpType}}
uniqueByExpression := &ExpressionNode{Operation: &Operation{OperationType: uniqueByOpType}, Rhs: selfExpression}
return uniqueBy(d, context, uniqueByExpression)
}
func uniqueBy(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debugf("-- uniqueBy Operator")
var results = list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
candidateNode := unwrapDoc(candidate.Node)
if candidateNode.Kind != yaml.SequenceNode {
return Context{}, fmt.Errorf("Only arrays are supported for unique")
}
var newMatches = orderedmap.NewOrderedMap()
for _, node := range candidateNode.Content {
child := &CandidateNode{Node: node}
rhs, err := d.GetMatchingNodes(context.SingleChildContext(child), expressionNode.Rhs)
if err != nil {
return Context{}, err
}
first := rhs.MatchingNodes.Front()
keyCandidate := first.Value.(*CandidateNode)
keyValue := keyCandidate.Node.Value
_, exists := newMatches.Get(keyValue)
if !exists {
newMatches.Set(keyValue, child.Node)
}
}
resultNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
for el := newMatches.Front(); el != nil; el = el.Next() {
resultNode.Content = append(resultNode.Content, el.Value.(*yaml.Node))
}
results.PushBack(candidate.CreateChild(nil, resultNode))
}
return context.ChildContext(results), nil
}

View File

@ -0,0 +1,50 @@
package yqlib
import (
"testing"
)
var uniqueOperatorScenarios = []expressionScenario{
{
description: "Unique array of scalars (string/numbers)",
document: `[1,2,3,2]`,
expression: `unique`,
expected: []string{
"D0, P[], (!!seq)::- 1\n- 2\n- 3\n",
},
},
{
description: "Unique nulls",
subdescription: "Unique works on the node value, so it considers different representations of nulls to be different",
document: `[~,null, ~, null]`,
expression: `unique`,
expected: []string{
"D0, P[], (!!seq)::- ~\n- null\n",
},
},
{
description: "Unique all nulls",
subdescription: "Run against the node tag to unique all the nulls",
document: `[~,null, ~, null]`,
expression: `unique_by(tag)`,
expected: []string{
"D0, P[], (!!seq)::- ~\n",
},
},
{
description: "Unique array object fields",
document: `[{name: harry, pet: cat}, {name: billy, pet: dog}, {name: harry, pet: dog}]`,
expression: `unique_by(.name)`,
expected: []string{
"D0, P[], (!!seq)::- {name: harry, pet: cat}\n- {name: billy, pet: dog}\n",
},
},
}
func TestUniqueOperatorScenarios(t *testing.T) {
for _, tt := range uniqueOperatorScenarios {
testScenario(t, &tt)
}
documentScenarios(t, "Unique", uniqueOperatorScenarios)
}