Added sort_by operator

This commit is contained in:
Mike Farah 2021-12-04 13:54:12 +11:00
parent 2f05f7390f
commit e22df34158
7 changed files with 183 additions and 31 deletions

View File

@ -1,19 +0,0 @@
## Sort by string field
Given a sample.yml file of:
```yaml
- a: banana
- a: cat
- a: apple
```
then
```bash
yq eval 'sort_by(.a)' sample.yml
```
will output
```yaml
- a: apple
- a: banana
- a: cat
```

View File

@ -0,0 +1,5 @@
# Sort
Sorts an array. Use `sort` to sort an array as is, or `sort_by` to sort by a particular subfield.
Note that at this stage, `yq` only sorts scalar fields.

98
pkg/yqlib/doc/sort.md Normal file
View File

@ -0,0 +1,98 @@
# Sort
Sorts an array. Use `sort` to sort an array as is, or `sort_by` to sort by a particular subfield.
Note that at this stage, `yq` only sorts scalar fields.
## Sort by string field
Given a sample.yml file of:
```yaml
- a: banana
- a: cat
- a: apple
```
then
```bash
yq eval 'sort_by(.a)' sample.yml
```
will output
```yaml
- a: apple
- a: banana
- a: cat
```
## Sort is stable
Note the order of the elements in unchanged when equal in sorting.
Given a sample.yml file of:
```yaml
- a: banana
b: 1
- a: banana
b: 2
- a: banana
b: 3
- a: banana
b: 4
```
then
```bash
yq eval 'sort_by(.a)' sample.yml
```
will output
```yaml
- a: banana
b: 1
- a: banana
b: 2
- a: banana
b: 3
- a: banana
b: 4
```
## Sort by numeric field
Given a sample.yml file of:
```yaml
- a: 10
- a: 100
- a: 1
```
then
```bash
yq eval 'sort_by(.a)' sample.yml
```
will output
```yaml
- a: 1
- a: 10
- a: 100
```
## Sort, nulls come first
Given a sample.yml file of:
```yaml
- 8
- 3
- null
- 6
- true
- false
- cat
```
then
```bash
yq eval 'sort' sample.yml
```
will output
```yaml
- null
- false
- true
- 3
- 6
- 8
- cat
```

View File

@ -352,6 +352,8 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`from_json`), opToken(decodeOpType))
lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType))
lexer.Add([]byte(`sort_keys`), opToken(sortKeysOpType))
lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false}))
lexer.Add([]byte(`strload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
lexer.Add([]byte(`select`), opToken(selectOpType))
@ -377,6 +379,7 @@ func initLexer() (*lex.Lexer, error) {
lexer.Add([]byte(`capture`), opToken(captureOpType))
lexer.Add([]byte(`test`), opToken(testOpType))
lexer.Add([]byte(`sort`), opToken(sortOpType))
lexer.Add([]byte(`sort_by`), opToken(sortByOpType))
lexer.Add([]byte(`any`), opToken(anyOpType))

View File

@ -99,6 +99,7 @@ var getPathOpType = &operationType{Type: "GET_PATH", NumArgs: 0, Precedence: 50,
var explodeOpType = &operationType{Type: "EXPLODE", NumArgs: 1, Precedence: 50, Handler: explodeOperator}
var sortByOpType = &operationType{Type: "SORT_BY", NumArgs: 1, Precedence: 50, Handler: sortByOperator}
var sortOpType = &operationType{Type: "SORT", NumArgs: 0, Precedence: 50, Handler: sortOperator}
var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 50, Handler: sortKeysOperator}
var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator}
var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator}

View File

@ -10,6 +10,12 @@ import (
yaml "gopkg.in/yaml.v3"
)
func sortOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
selfExpression := &ExpressionNode{Operation: &Operation{OperationType: selfReferenceOpType}}
expressionNode.Rhs = selfExpression
return sortByOperator(d, context, expressionNode)
}
// context represents the current matching nodes in the expression pipeline
//expressionNode is your current expression (sort_by)
func sortByOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
@ -44,9 +50,13 @@ func sortByOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
sortableArray[i] = sortableNode{Node: originalNode, NodeToCompare: nodeToCompare}
if nodeToCompare.Kind != yaml.ScalarNode {
return Context{}, fmt.Errorf("sort only works for scalars, got %v", nodeToCompare.Tag)
}
sort.Sort(sortableArray)
}
sort.Stable(sortableArray)
sortedList := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq", Style: candidateNode.Style}
sortedList.Content = make([]*yaml.Node, len(candidateNode.Content))
@ -73,7 +83,27 @@ func (a sortableNodeArray) Less(i, j int) bool {
lhs := a[i].NodeToCompare
rhs := a[j].NodeToCompare
if lhs.Tag != rhs.Tag || lhs.Tag == "!!str" {
if lhs.Tag == "!!null" && rhs.Tag != "!!null" {
return true
} else if lhs.Tag != "!!null" && rhs.Tag == "!!null" {
return false
} else if lhs.Tag == "!!bool" && rhs.Tag != "!!bool" {
return true
} else if lhs.Tag != "!!bool" && rhs.Tag == "!!bool" {
return false
} else if lhs.Tag == "!!bool" && rhs.Tag == "!!bool" {
lhsTruthy, err := isTruthyNode(lhs)
if err != nil {
panic(fmt.Errorf("could not parse %v as boolean: %w", lhs.Value, err))
}
rhsTruthy, err := isTruthyNode(rhs)
if err != nil {
panic(fmt.Errorf("could not parse %v as boolean: %w", rhs.Value, err))
}
return !lhsTruthy && rhsTruthy
} else if lhs.Tag != rhs.Tag || lhs.Tag == "!!str" {
return strings.Compare(lhs.Value, rhs.Value) < 0
} else if lhs.Tag == "!!int" && rhs.Tag == "!!int" {
_, lhsNum, err := parseInt(lhs.Value)

View File

@ -8,22 +8,56 @@ var sortByOperatorScenarios = []expressionScenario{
document: "[{a: banana},{a: cat},{a: apple}]",
expression: `sort_by(.a)`,
expected: []string{
"D0, P[], (!!bool)::true\n",
"D0, P[], (!!seq)::[{a: apple}, {a: banana}, {a: cat}]\n",
},
},
{
description: "Sort is stable",
subdescription: "Note the order of the elements in unchanged when equal in sorting.",
document: "[{a: banana, b: 1}, {a: banana, b: 2}, {a: banana, b: 3}, {a: banana, b: 4}]",
expression: `sort_by(.a)`,
expected: []string{
"D0, P[], (!!seq)::[{a: banana, b: 1}, {a: banana, b: 2}, {a: banana, b: 3}, {a: banana, b: 4}]\n",
},
},
{
description: "Sort by numeric field",
document: "[{a: 10},{a: 100},{a: 1}]",
expression: `sort_by(.a)`,
expected: []string{
"D0, P[], (!!seq)::[{a: 1}, {a: 10}, {a: 100}]\n",
},
},
{
skipDoc: true,
document: "[{a: 1.1},{a: 1.001},{a: 1.01}]",
expression: `sort_by(.a)`,
expected: []string{
"D0, P[], (!!seq)::[{a: 1.001}, {a: 1.01}, {a: 1.1}]\n",
},
},
{
description: "Sort, nulls come first",
document: "[8,3,null,6, true, false, cat]",
expression: `sort`,
expected: []string{
"D0, P[], (!!seq)::[null, false, true, 3, 6, 8, cat]\n",
},
},
{
skipDoc: true,
description: "false before true",
document: "[{a: false, b: 1}, {a: true, b: 2}, {a: false, b: 3}]",
expression: `sort_by(.a)`,
expected: []string{
"D0, P[], (!!seq)::[{a: false, b: 1}, {a: false, b: 3}, {a: true, b: 2}]\n",
},
},
// {
// description: "Sort, nulls come first",
// document: "[8,3,null,6]",
// expression: `sort`,
// expected: []string{
// "D0, P[], (!!bool)::[null,3,6,8]\n",
// },
// },
}
func TestSortByOperatorScenarios(t *testing.T) {
for _, tt := range sortByOperatorScenarios {
testScenario(t, &tt)
}
documentScenarios(t, "Sort", sortByOperatorScenarios)
documentScenarios(t, "sort", sortByOperatorScenarios)
}