From a1698b740a7730c338db500cf5a3a287912e91d6 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Sat, 11 Feb 2023 04:44:15 +1100 Subject: [PATCH] Added ability to sort by multiple fields #1541 --- pkg/yqlib/doc/operators/sort.md | 22 +++++++++ pkg/yqlib/operator_sort.go | 80 +++++++++++++++++++++------------ pkg/yqlib/operator_sort_test.go | 26 +++++++++++ 3 files changed, 100 insertions(+), 28 deletions(-) diff --git a/pkg/yqlib/doc/operators/sort.md b/pkg/yqlib/doc/operators/sort.md index 51c79e4a..72a17305 100644 --- a/pkg/yqlib/doc/operators/sort.md +++ b/pkg/yqlib/doc/operators/sort.md @@ -25,6 +25,28 @@ will output - a: cat ``` +## Sort by multiple fields +Given a sample.yml file of: +```yaml +- a: dog +- a: cat + b: banana +- a: cat + b: apple +``` +then +```bash +yq 'sort_by(.a, .b)' sample.yml +``` +will output +```yaml +- a: cat + b: apple +- a: cat + b: banana +- a: dog +``` + ## Sort descending by string field Use sort with reverse to sort in descending order. diff --git a/pkg/yqlib/operator_sort.go b/pkg/yqlib/operator_sort.go index 62c85e53..01070f2d 100644 --- a/pkg/yqlib/operator_sort.go +++ b/pkg/yqlib/operator_sort.go @@ -42,18 +42,7 @@ func sortByOperator(d *dataTreeNavigator, context Context, expressionNode *Expre return Context{}, err } - nodeToCompare := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!null"} - if compareContext.MatchingNodes.Len() > 0 { - nodeToCompare = compareContext.MatchingNodes.Front().Value.(*CandidateNode).Node - } - - log.Debug("going to compare %v by %v", NodeToString(candidate.CreateReplacement(originalNode)), NodeToString(candidate.CreateReplacement(nodeToCompare))) - - sortableArray[i] = sortableNode{Node: originalNode, NodeToCompare: nodeToCompare, dateTimeLayout: context.GetDateTimeLayout()} - - if nodeToCompare.Kind != yaml.ScalarNode { - return Context{}, fmt.Errorf("sort only works for scalars, got %v", nodeToCompare.Tag) - } + sortableArray[i] = sortableNode{Node: originalNode, CompareContext: compareContext, dateTimeLayout: context.GetDateTimeLayout()} } @@ -72,7 +61,7 @@ func sortByOperator(d *dataTreeNavigator, context Context, expressionNode *Expre type sortableNode struct { Node *yaml.Node - NodeToCompare *yaml.Node + CompareContext Context dateTimeLayout string } @@ -82,9 +71,28 @@ func (a sortableNodeArray) Len() int { return len(a) } func (a sortableNodeArray) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a sortableNodeArray) Less(i, j int) bool { - lhs := a[i].NodeToCompare - rhs := a[j].NodeToCompare + lhsContext := a[i].CompareContext + rhsContext := a[j].CompareContext + rhsEl := rhsContext.MatchingNodes.Front() + for lhsEl := lhsContext.MatchingNodes.Front(); lhsEl != nil && rhsEl != nil; lhsEl = lhsEl.Next() { + lhs := lhsEl.Value.(*CandidateNode) + rhs := rhsEl.Value.(*CandidateNode) + + result := a.compare(lhs.Node, rhs.Node, a[i].dateTimeLayout) + + if result < 0 { + return true + } else if result > 0 { + return false + } + + rhsEl = rhsEl.Next() + } + return false +} + +func (a sortableNodeArray) compare(lhs *yaml.Node, rhs *yaml.Node, dateTimeLayout string) int { lhsTag := lhs.Tag rhsTag := rhs.Tag @@ -99,7 +107,7 @@ func (a sortableNodeArray) Less(i, j int) bool { } isDateTime := lhsTag == "!!timestamp" && rhsTag == "!!timestamp" - layout := a[i].dateTimeLayout + layout := dateTimeLayout // if the lhs is a string, it might be a timestamp in a custom format. if lhsTag == "!!str" && layout != time.RFC3339 { _, errLhs := parseDateTime(layout, lhs.Value) @@ -108,13 +116,13 @@ func (a sortableNodeArray) Less(i, j int) bool { } if lhsTag == "!!null" && rhsTag != "!!null" { - return true + return -1 } else if lhsTag != "!!null" && rhsTag == "!!null" { - return false + return 1 } else if lhsTag == "!!bool" && rhsTag != "!!bool" { - return true + return -1 } else if lhsTag != "!!bool" && rhsTag == "!!bool" { - return false + return 1 } else if lhsTag == "!!bool" && rhsTag == "!!bool" { lhsTruthy, err := isTruthyNode(lhs) if err != nil { @@ -125,20 +133,30 @@ func (a sortableNodeArray) Less(i, j int) bool { if err != nil { panic(fmt.Errorf("could not parse %v as boolean: %w", rhs.Value, err)) } - - return !lhsTruthy && rhsTruthy + if lhsTruthy == rhsTruthy { + return 0 + } else if lhsTruthy { + return 1 + } + return -1 } else if isDateTime { lhsTime, err := parseDateTime(layout, lhs.Value) if err != nil { log.Warningf("Could not parse time %v with layout %v for sort, sorting by string instead: %w", lhs.Value, layout, err) - return strings.Compare(lhs.Value, rhs.Value) < 0 + return strings.Compare(lhs.Value, rhs.Value) } rhsTime, err := parseDateTime(layout, rhs.Value) if err != nil { log.Warningf("Could not parse time %v with layout %v for sort, sorting by string instead: %w", rhs.Value, layout, err) - return strings.Compare(lhs.Value, rhs.Value) < 0 + return strings.Compare(lhs.Value, rhs.Value) } - return lhsTime.Before(rhsTime) + if lhsTime.Equal(rhsTime) { + return 0 + } else if lhsTime.Before(rhsTime) { + return -1 + } + + return 1 } else if lhsTag == "!!int" && rhsTag == "!!int" { _, lhsNum, err := parseInt64(lhs.Value) if err != nil { @@ -148,7 +166,7 @@ func (a sortableNodeArray) Less(i, j int) bool { if err != nil { panic(err) } - return lhsNum < rhsNum + return int(lhsNum - rhsNum) } else if (lhsTag == "!!int" || lhsTag == "!!float") && (rhsTag == "!!int" || rhsTag == "!!float") { lhsNum, err := strconv.ParseFloat(lhs.Value, 64) if err != nil { @@ -158,8 +176,14 @@ func (a sortableNodeArray) Less(i, j int) bool { if err != nil { panic(err) } - return lhsNum < rhsNum + if lhsNum == rhsNum { + return 0 + } else if lhsNum < rhsNum { + return -1 + } + + return 1 } - return strings.Compare(lhs.Value, rhs.Value) < 0 + return strings.Compare(lhs.Value, rhs.Value) } diff --git a/pkg/yqlib/operator_sort_test.go b/pkg/yqlib/operator_sort_test.go index 21e5c990..9a8bc84a 100644 --- a/pkg/yqlib/operator_sort_test.go +++ b/pkg/yqlib/operator_sort_test.go @@ -11,6 +11,32 @@ var sortByOperatorScenarios = []expressionScenario{ "D0, P[], (!!seq)::[{a: apple}, {a: banana}, {a: cat}]\n", }, }, + { + description: "Sort by multiple fields", + document: "[{a: dog},{a: cat, b: banana},{a: cat, b: apple}]", + expression: `sort_by(.a, .b)`, + expected: []string{ + "D0, P[], (!!seq)::[{a: cat, b: apple}, {a: cat, b: banana}, {a: dog}]\n", + }, + }, + { + description: "Sort by multiple fields", + skipDoc: true, + document: "[{a: dog, b: good},{a: cat, c: things},{a: cat, b: apple}]", + expression: `sort_by(.a, .b)`, + expected: []string{ + "D0, P[], (!!seq)::[{a: cat, c: things}, {a: cat, b: apple}, {a: dog, b: good}]\n", + }, + }, + { + description: "Sort by multiple fields", + skipDoc: true, + document: "[{a: dog, b: 0.1},{a: cat, b: 0.01},{a: cat, b: 0.001}]", + expression: `sort_by(.a, .b)`, + expected: []string{ + "D0, P[], (!!seq)::[{a: cat, b: 0.001}, {a: cat, b: 0.01}, {a: dog, b: 0.1}]\n", + }, + }, { description: "Sort descending by string field", subdescription: "Use sort with reverse to sort in descending order.",