yq/pkg/yqlib/operator_traverse_path.go
StressTestor 0cc5c19843 fix: apply per-candidate index when traversing arrays/maps by a streamed index
`$o[.]` over a streamed context (e.g. `keys[] | $o[.]`) only returned the
first match. The index expression yields one index set per incoming
candidate, but traverseArrayOperator used only the first set
(rhs.MatchingNodes.Front()), dropping the rest.

Pair each index set with its candidate: when the LHS has one node per
candidate (e.g. `.[] | .[idx]`) each node is traversed with its own index
set; when the LHS collapses to a single node (a variable) it is traversed
against every index set. Covers both arrays and maps.

Fixes #2593.
2026-06-15 16:00:44 -06:00

421 lines
14 KiB
Go

package yqlib
import (
"container/list"
"fmt"
"slices"
"github.com/elliotchance/orderedmap"
)
type traversePreferences struct {
DontFollowAlias bool
IncludeMapKeys bool
DontAutoCreate bool // by default, we automatically create entries on the fly.
DontIncludeMapValues bool
OptionalTraverse bool // e.g. .adf?
ExactKeyMatch bool // by default we let wild/glob patterns. Don't do that for merge though.
}
func splat(context Context, prefs traversePreferences) (Context, error) {
return traverseNodesWithArrayIndices(context, make([]*CandidateNode, 0), prefs)
}
func traversePathOperator(_ *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debugf("traversePathOperator")
var matches = list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
newNodes, err := traverse(context, el.Value.(*CandidateNode), expressionNode.Operation)
if err != nil {
return Context{}, err
}
matches.PushBackList(newNodes)
}
return context.ChildContext(matches), nil
}
// resolveAliasChain follows an alias chain iteratively, returning the
// first non-alias node. Returns an error if a cycle is detected.
func resolveAliasChain(node *CandidateNode) (*CandidateNode, error) {
if node.Kind != AliasNode {
return node, nil
}
visited := map[*CandidateNode]bool{}
for node.Kind == AliasNode {
if visited[node] {
return nil, fmt.Errorf("alias cycle detected")
}
visited[node] = true
log.Debug("its an alias!")
node = node.Alias
}
return node, nil
}
func traverse(context Context, matchingNode *CandidateNode, operation *Operation) (*list.List, error) {
log.Debugf("Traversing %v", NodeToString(matchingNode))
var err error
matchingNode, err = resolveAliasChain(matchingNode)
if err != nil {
return nil, err
}
if matchingNode.Tag == "!!null" && operation.Value != "[]" && !context.DontAutoCreate {
log.Debugf("Guessing kind")
// we must have added this automatically, lets guess what it should be now
switch operation.Value.(type) {
case int, int64:
log.Debugf("probably an array")
matchingNode.Kind = SequenceNode
default:
log.Debugf("probably a map")
matchingNode.Kind = MappingNode
}
matchingNode.Tag = ""
}
switch matchingNode.Kind {
case MappingNode:
log.Debugf("its a map with %v entries", len(matchingNode.Content)/2)
return traverseMap(context, matchingNode, createStringScalarNode(operation.StringValue), operation.Preferences.(traversePreferences), false)
case SequenceNode:
log.Debugf("its a sequence of %v things!", len(matchingNode.Content))
return traverseArray(matchingNode, operation, operation.Preferences.(traversePreferences))
default:
return list.New(), nil
}
}
func traverseArrayOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
//lhs may update the variable context, we should pass that into the RHS
// BUT we still return the original context back (see jq)
// https://stedolan.github.io/jq/manual/#Variable/SymbolicBindingOperator:...as$identifier|...
log.Debugf("--traverseArrayOperator")
if expressionNode.RHS != nil && expressionNode.RHS.RHS != nil && expressionNode.RHS.RHS.Operation.OperationType == createMapOpType {
lhsContext, err := d.GetMatchingNodes(context, expressionNode.LHS)
if err != nil {
return Context{}, err
}
return sliceArrayOperator(d, lhsContext, expressionNode.RHS.RHS)
}
lhs, err := d.GetMatchingNodes(context, expressionNode.LHS)
if err != nil {
return Context{}, err
}
// rhs is a collect expression that yields the indices to retrieve. It is
// evaluated over the whole context, producing one index set per incoming
// candidate.
rhs, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.RHS)
if err != nil {
return Context{}, err
}
prefs := traversePreferences{}
if expressionNode.Operation.Preferences != nil {
prefs = expressionNode.Operation.Preferences.(traversePreferences)
}
results := list.New()
if lhs.MatchingNodes.Len() == rhs.MatchingNodes.Len() {
// One index set per LHS node (both derive from the same context):
// traverse each LHS node with its own index set. Previously only the
// first index set was used, so a context-dependent index like `$o[.]`
// over a `keys[]` stream dropped every match but the first (#2593).
rhsEl := rhs.MatchingNodes.Front()
for lhsEl := lhs.MatchingNodes.Front(); lhsEl != nil; lhsEl = lhsEl.Next() {
indicesToTraverse := rhsEl.Value.(*CandidateNode).Content
result, err := traverseNodesWithArrayIndices(context.SingleChildContext(lhsEl.Value.(*CandidateNode)), indicesToTraverse, prefs)
if err != nil {
return Context{}, err
}
results.PushBackList(result.MatchingNodes)
rhsEl = rhsEl.Next()
}
} else {
// LHS collapsed to a single node (e.g. a variable) while the index
// varies per candidate: traverse the LHS against every index set.
for rhsEl := rhs.MatchingNodes.Front(); rhsEl != nil; rhsEl = rhsEl.Next() {
indicesToTraverse := rhsEl.Value.(*CandidateNode).Content
result, err := traverseNodesWithArrayIndices(lhs, indicesToTraverse, prefs)
if err != nil {
return Context{}, err
}
results.PushBackList(result.MatchingNodes)
}
}
return context.ChildContext(results), nil
}
func traverseNodesWithArrayIndices(context Context, indicesToTraverse []*CandidateNode, prefs traversePreferences) (Context, error) {
var matchingNodeMap = list.New()
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
newNodes, err := traverseArrayIndices(context, candidate, indicesToTraverse, prefs)
if err != nil {
return Context{}, err
}
matchingNodeMap.PushBackList(newNodes)
}
return context.ChildContext(matchingNodeMap), nil
}
func traverseArrayIndices(context Context, matchingNode *CandidateNode, indicesToTraverse []*CandidateNode, prefs traversePreferences) (*list.List, error) {
var err error
matchingNode, err = resolveAliasChain(matchingNode)
if err != nil {
return nil, err
}
if matchingNode.Tag == "!!null" {
log.Debugf("OperatorArrayTraverse got a null - turning it into an empty array")
// auto vivification
matchingNode.Tag = ""
matchingNode.Kind = SequenceNode
//check that the indices are numeric, if not, then we should create an object
if len(indicesToTraverse) != 0 && indicesToTraverse[0].Tag != "!!int" {
matchingNode.Kind = MappingNode
}
}
switch matchingNode.Kind {
case SequenceNode:
return traverseArrayWithIndices(matchingNode, indicesToTraverse, prefs)
case MappingNode:
return traverseMapWithIndices(context, matchingNode, indicesToTraverse, prefs)
}
log.Debugf("OperatorArrayTraverse skipping %v as its a %v", matchingNode, matchingNode.Tag)
return list.New(), nil
}
func traverseMapWithIndices(context Context, candidate *CandidateNode, indices []*CandidateNode, prefs traversePreferences) (*list.List, error) {
if len(indices) == 0 {
return traverseMap(context, candidate, createStringScalarNode(""), prefs, true)
}
var matchingNodeMap = list.New()
for _, indexNode := range indices {
log.Debugf("traverseMapWithIndices: %v", indexNode.Value)
newNodes, err := traverseMap(context, candidate, indexNode, prefs, false)
if err != nil {
return nil, err
}
matchingNodeMap.PushBackList(newNodes)
}
return matchingNodeMap, nil
}
func traverseArrayWithIndices(node *CandidateNode, indices []*CandidateNode, prefs traversePreferences) (*list.List, error) {
log.Debug("traverseArrayWithIndices")
var newMatches = list.New()
if len(indices) == 0 {
log.Debug("splatting")
var index int
for index = 0; index < len(node.Content); index = index + 1 {
newMatches.PushBack(node.Content[index])
}
return newMatches, nil
}
for _, indexNode := range indices {
log.Debugf("traverseArrayWithIndices: '%v'", indexNode.Value)
index, err := parseInt(indexNode.Value)
if err != nil && prefs.OptionalTraverse {
continue
}
if err != nil {
return nil, fmt.Errorf("cannot index array with '%v' (%w)", indexNode.Value, err)
}
indexToUse := index
contentLength := len(node.Content)
for contentLength <= index {
if contentLength == 0 {
// default to nice yaml formatting
node.Style = 0
}
valueNode := createScalarNode(nil, "null")
node.AddChild(valueNode)
contentLength = len(node.Content)
}
if indexToUse < 0 {
indexToUse = contentLength + indexToUse
}
if indexToUse < 0 {
return nil, fmt.Errorf("index [%v] out of range, array size is %v", index, contentLength)
}
newMatches.PushBack(node.Content[indexToUse])
}
return newMatches, nil
}
func keyMatches(key *CandidateNode, wantedKey string, exactKeyMatch bool) bool {
if exactKeyMatch {
// this is used for merge
return key.Value == wantedKey
}
return matchKey(key.Value, wantedKey)
}
func traverseMap(context Context, matchingNode *CandidateNode, keyNode *CandidateNode, prefs traversePreferences, splat bool) (*list.List, error) {
var newMatches = orderedmap.NewOrderedMap()
err := doTraverseMap(newMatches, matchingNode, keyNode.Value, prefs, splat)
if err != nil {
return nil, err
}
if !splat && !prefs.DontAutoCreate && !context.DontAutoCreate && newMatches.Len() == 0 {
log.Debugf("no matches, creating one for %v", NodeToString(keyNode))
//no matches, create one automagically
valueNode := matchingNode.CreateChild()
valueNode.Kind = ScalarNode
valueNode.Tag = "!!null"
valueNode.Value = "null"
if len(matchingNode.Content) == 0 {
matchingNode.Style = 0
}
keyNode, valueNode = matchingNode.AddKeyValueChild(keyNode, valueNode)
if prefs.IncludeMapKeys {
newMatches.Set(keyNode.GetKey(), keyNode)
}
if !prefs.DontIncludeMapValues {
newMatches.Set(valueNode.GetKey(), valueNode)
}
}
results := list.New()
i := 0
for el := newMatches.Front(); el != nil; el = el.Next() {
results.PushBack(el.Value)
i++
}
return results, nil
}
func doTraverseMap(newMatches *orderedmap.OrderedMap, node *CandidateNode, wantedKey string, prefs traversePreferences, splat bool) error {
// value.Content is a concatenated array of key, value,
// so keys are in the even indices, values in odd.
// merge aliases are defined first, but we only want to traverse them
// if we don't find a match directly on this node first.
var contents = node.Content
if !prefs.DontFollowAlias {
if ConfiguredYamlPreferences.FixMergeAnchorToSpec {
// First evaluate merge keys to make explicit keys take precedence, following spec
// We also iterate in reverse to make earlier merge keys take precedence,
// although normally there's just one '<<'
for index := len(node.Content) - 2; index >= 0; index -= 2 {
keyNode := node.Content[index]
valueNode := node.Content[index+1]
if keyNode.Tag == "!!merge" {
log.Debug("Merge anchor")
err := traverseMergeAnchor(newMatches, valueNode, wantedKey, prefs, splat)
if err != nil {
return err
}
}
}
}
}
for index := 0; index+1 < len(contents); index = index + 2 {
key := contents[index]
value := contents[index+1]
//skip the 'merge' tag, find a direct match first
if key.Tag == "!!merge" && !prefs.DontFollowAlias && wantedKey != key.Value {
if !ConfiguredYamlPreferences.FixMergeAnchorToSpec {
log.Debug("Merge anchor")
if showMergeAnchorToSpecWarning {
log.Warning("--yaml-fix-merge-anchor-to-spec is false; causing merge anchors to override the existing values which isn't to the yaml spec. This flag will default to true in late 2025. See https://mikefarah.gitbook.io/yq/operators/traverse-read for more details.")
showMergeAnchorToSpecWarning = false
}
err := traverseMergeAnchor(newMatches, value, wantedKey, prefs, splat)
if err != nil {
return err
}
}
} else if splat || keyMatches(key, wantedKey, prefs.ExactKeyMatch) {
log.Debug("MATCHED")
if prefs.IncludeMapKeys {
log.Debug("including key")
keyName := key.GetKey()
if !newMatches.Set(keyName, key) {
log.Debug("overwriting existing key")
}
}
if !prefs.DontIncludeMapValues {
log.Debug("including value")
valueName := value.GetKey()
if !newMatches.Set(valueName, value) {
log.Debug("overwriting existing value")
}
}
}
}
return nil
}
func traverseMergeAnchor(newMatches *orderedmap.OrderedMap, merge *CandidateNode, wantedKey string, prefs traversePreferences, splat bool) error {
if merge.Kind == AliasNode {
merge = merge.Alias
}
switch merge.Kind {
case MappingNode:
return doTraverseMap(newMatches, merge, wantedKey, prefs, splat)
case SequenceNode:
content := slices.All(merge.Content)
if ConfiguredYamlPreferences.FixMergeAnchorToSpec {
// Reverse to make earlier values take precedence, following spec
content = slices.Backward(merge.Content)
}
for _, childValue := range content {
if childValue.Kind == AliasNode {
childValue = childValue.Alias
}
if childValue.Kind != MappingNode {
log.Debugf(
"can only use merge anchors with maps (!!map) or sequences (!!seq) of maps, but got sequence containing %v",
childValue.Tag)
return nil
}
err := doTraverseMap(newMatches, childValue, wantedKey, prefs, splat)
if err != nil {
return err
}
}
return nil
default:
log.Debugf("can only use merge anchors with maps (!!map) or sequences (!!seq) of maps, but got %v", merge.Tag)
return nil
}
}
func traverseArray(candidate *CandidateNode, operation *Operation, prefs traversePreferences) (*list.List, error) {
log.Debugf("operation Value %v", operation.Value)
indices := []*CandidateNode{{Value: operation.StringValue}}
return traverseArrayWithIndices(candidate, indices, prefs)
}