yq/pkg/yqlib/lib.go
Jan Dubois 546f52515c Add Kind checks to findInArray and findKeyInMap
findInArray and findKeyInMap accept any *CandidateNode but produce
wrong results when called on the wrong Kind: findInArray uses stride 1,
correct for SequenceNodes but dangerous on MappingNodes (where
key-value pairs live at even-odd indices); findKeyInMap uses stride 2,
correct for MappingNodes but silently skips elements in SequenceNodes.

Commit b0ba9589 fixed two call sites that passed MappingNodes to
findInArray, but nothing prevents the same mistake from recurring.
Each function now logs a warning and returns -1 on a Kind mismatch,
surfacing misuse in tests and debug output rather than letting it
corrupt results silently.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-22 18:49:32 -07:00

342 lines
7.9 KiB
Go

// Use the top level Evaluator or StreamEvaluator to evaluate expressions and return matches.
package yqlib
import (
"container/list"
"fmt"
"log/slog"
"math"
"strconv"
"strings"
)
var ExpressionParser ExpressionParserInterface
func InitExpressionParser() {
if ExpressionParser == nil {
ExpressionParser = newExpressionParser()
}
}
var log = newLogger()
var PrettyPrintExp = `(... | (select(tag != "!!str"), select(tag == "!!str") | select(test("(?i)^(y|yes|n|no|on|off)$") | not)) ) style=""`
// GetLogger returns the yq logger instance.
func GetLogger() *Logger {
return log
}
func getContentValueByKey(content []*CandidateNode, key string) *CandidateNode {
for index := 0; index < len(content)-1; index = index + 2 {
keyNode := content[index]
valueNode := content[index+1]
if keyNode.Value == key {
return valueNode
}
}
return nil
}
func recurseNodeArrayEqual(lhs *CandidateNode, rhs *CandidateNode) bool {
if len(lhs.Content) != len(rhs.Content) {
return false
}
for index := 0; index < len(lhs.Content); index = index + 1 {
if !recursiveNodeEqual(lhs.Content[index], rhs.Content[index]) {
return false
}
}
return true
}
func findInArray(array *CandidateNode, item *CandidateNode) int {
if array.Kind != SequenceNode {
log.Warningf("findInArray called on %v node, expected SequenceNode", array.Tag)
return -1
}
for index := 0; index < len(array.Content); index = index + 1 {
if recursiveNodeEqual(array.Content[index], item) {
return index
}
}
return -1
}
func findKeyInMap(dataMap *CandidateNode, item *CandidateNode) int {
if dataMap.Kind != MappingNode {
log.Warningf("findKeyInMap called on %v node, expected MappingNode", dataMap.Tag)
return -1
}
for index := 0; index < len(dataMap.Content); index = index + 2 {
if recursiveNodeEqual(dataMap.Content[index], item) {
return index
}
}
return -1
}
func recurseNodeObjectEqual(lhs *CandidateNode, rhs *CandidateNode) bool {
if len(lhs.Content) != len(rhs.Content) {
return false
}
for index := 0; index < len(lhs.Content); index = index + 2 {
key := lhs.Content[index]
value := lhs.Content[index+1]
indexInRHS := findKeyInMap(rhs, key)
if indexInRHS == -1 || !recursiveNodeEqual(value, rhs.Content[indexInRHS+1]) {
return false
}
}
return true
}
func parseSnippet(value string) (*CandidateNode, error) {
if value == "" {
return &CandidateNode{
Kind: ScalarNode,
Tag: "!!null",
}, nil
}
decoder := NewYamlDecoder(ConfiguredYamlPreferences)
err := decoder.Init(strings.NewReader(value))
if err != nil {
return nil, err
}
result, err := decoder.Decode()
if err != nil {
return nil, err
}
if result.Kind == ScalarNode {
result.LineComment = result.LeadingContent
} else {
result.HeadComment = result.LeadingContent
}
result.LeadingContent = ""
if result.Tag == "!!str" {
// use the original string value, as
// decoding drops new lines
newNode := createScalarNode(value, value)
newNode.LineComment = result.LineComment
return newNode, nil
}
result.Line = 0
result.Column = 0
return result, err
}
func recursiveNodeEqual(lhs *CandidateNode, rhs *CandidateNode) bool {
if lhs.Kind != rhs.Kind {
return false
}
if lhs.Kind == ScalarNode {
//process custom tags of scalar nodes.
//dont worry about matching tags of maps or arrays.
lhsTag := lhs.guessTagFromCustomType()
rhsTag := rhs.guessTagFromCustomType()
if lhsTag != rhsTag {
return false
}
}
if lhs.Tag == "!!null" {
return true
} else if lhs.Kind == ScalarNode {
return lhs.Value == rhs.Value
} else if lhs.Kind == SequenceNode {
return recurseNodeArrayEqual(lhs, rhs)
} else if lhs.Kind == MappingNode {
return recurseNodeObjectEqual(lhs, rhs)
}
return false
}
// yaml numbers can have underscores, be hex and octal encoded...
func parseInt64(numberString string) (string, int64, error) {
if strings.Contains(numberString, "_") {
numberString = strings.ReplaceAll(numberString, "_", "")
}
if strings.HasPrefix(numberString, "0x") ||
strings.HasPrefix(numberString, "0X") {
num, err := strconv.ParseInt(numberString[2:], 16, 64)
return "0x%X", num, err
} else if strings.HasPrefix(numberString, "0o") {
num, err := strconv.ParseInt(numberString[2:], 8, 64)
return "0o%o", num, err
}
num, err := strconv.ParseInt(numberString, 10, 64)
return "%v", num, err
}
func parseInt(numberString string) (int, error) {
_, parsed, err := parseInt64(numberString)
if err != nil {
return 0, err
} else if parsed > math.MaxInt || parsed < math.MinInt {
return 0, fmt.Errorf("%v is not within [%v, %v]", parsed, math.MinInt, math.MaxInt)
}
return int(parsed), err
}
func processEscapeCharacters(original string) string {
if original == "" {
return original
}
var result strings.Builder
runes := []rune(original)
for i := 0; i < len(runes); i++ {
if runes[i] == '\\' && i < len(runes)-1 {
next := runes[i+1]
switch next {
case '\\':
// Check if followed by opening bracket - if so, preserve both backslashes
// this is required for string interpolation to work correctly.
if i+2 < len(runes) && runes[i+2] == '(' {
// Preserve \\ when followed by (
result.WriteRune('\\')
result.WriteRune('\\')
i++ // Skip the next backslash (we'll process the ( normally on next iteration)
continue
}
// Escaped backslash: \\ -> \
result.WriteRune('\\')
i++ // Skip the next backslash
continue
case '"':
result.WriteRune('"')
i++ // Skip the quote
continue
case 'n':
result.WriteRune('\n')
i++ // Skip the 'n'
continue
case 't':
result.WriteRune('\t')
i++ // Skip the 't'
continue
case 'r':
result.WriteRune('\r')
i++ // Skip the 'r'
continue
case 'f':
result.WriteRune('\f')
i++ // Skip the 'f'
continue
case 'v':
result.WriteRune('\v')
i++ // Skip the 'v'
continue
case 'b':
result.WriteRune('\b')
i++ // Skip the 'b'
continue
case 'a':
result.WriteRune('\a')
i++ // Skip the 'a'
continue
}
}
result.WriteRune(runes[i])
}
value := result.String()
if value != original {
log.Debugf("processEscapeCharacters from [%v] to [%v]", original, value)
}
return value
}
func headAndLineComment(node *CandidateNode) string {
return headComment(node) + lineComment(node)
}
func headComment(node *CandidateNode) string {
return strings.Replace(node.HeadComment, "#", "", 1)
}
func lineComment(node *CandidateNode) string {
return strings.Replace(node.LineComment, "#", "", 1)
}
func footComment(node *CandidateNode) string {
return strings.Replace(node.FootComment, "#", "", 1)
}
// use for debugging only
func NodesToString(collection *list.List) string {
if !log.IsEnabledFor(slog.LevelDebug) {
return ""
}
result := fmt.Sprintf("%v results\n", collection.Len())
for el := collection.Front(); el != nil; el = el.Next() {
result = result + "\n" + NodeToString(el.Value.(*CandidateNode))
}
return result
}
func NodeToString(node *CandidateNode) string {
if !log.IsEnabledFor(slog.LevelDebug) {
return ""
}
if node == nil {
return "-- nil --"
}
tag := node.Tag
if node.Kind == AliasNode {
tag = "alias"
}
valueToUse := node.Value
if valueToUse == "" {
valueToUse = fmt.Sprintf("%v kids", len(node.Content))
}
return fmt.Sprintf(`D%v, P%v, %v (%v)::%v`, node.GetDocument(), node.GetNicePath(), KindString(node.Kind), tag, valueToUse)
}
func NodeContentToString(node *CandidateNode, depth int) string {
if !log.IsEnabledFor(slog.LevelDebug) {
return ""
}
var sb strings.Builder
for _, child := range node.Content {
for i := 0; i < depth; i++ {
sb.WriteString(" ")
}
sb.WriteString("- ")
sb.WriteString(NodeToString(child))
sb.WriteString("\n")
sb.WriteString(NodeContentToString(child, depth+1))
}
return sb.String()
}
func KindString(kind Kind) string {
switch kind {
case ScalarNode:
return "ScalarNode"
case SequenceNode:
return "SequenceNode"
case MappingNode:
return "MappingNode"
case AliasNode:
return "AliasNode"
default:
return "unknown!"
}
}