yq/pkg/yqlib/encoder_shellvariables.go
Flint Winters f98028c925
Unwrap scalars in shell output mode. (#2548)
* feat: Add UnwrapScalar to ShellVariablesPreferences

- Add UnwrapScalar boolean field to ShellVariablesPreferences struct.
- Initialize UnwrapScalar to false in NewDefaultShellVariablesPreferences.
- This preference will control whether shell output should be quoted or raw.

* feat: Propagate unwrapScalar to ShellVariablesPreferences

- In configureEncoder function, set UnwrapScalar in ConfiguredShellVariablesPreferences.
- This ensures the -r flag's state is passed to the shell encoder for raw output control.

* feat: Implement conditional quoting in shellVariablesEncoder

- Modify doEncode method to check pe.prefs.UnwrapScalar.
- If UnwrapScalar is true, output raw node.Value.
- Otherwise, use quoteValue for shell-safe quoting.
- This enables quote-free output for Kubernetes workflows when -r is used.

* test: Add tests for UnwrapScalar in shell encoder

- Introduce assertEncodesToUnwrapped helper function.
- Add TestShellVariablesEncoderUnwrapScalar to verify quote-free output with -r.
- Add TestShellVariablesEncoderDefaultQuoting to confirm default quoting behavior without -r.
- Ensure comprehensive testing of conditional quoting logic for shell output.

* remove redundant test
2026-01-01 15:21:55 +11:00

162 lines
4.2 KiB
Go

//go:build !yq_noshell
package yqlib
import (
"fmt"
"io"
"strings"
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
type shellVariablesEncoder struct {
prefs ShellVariablesPreferences
}
func NewShellVariablesEncoder() Encoder {
return &shellVariablesEncoder{
prefs: ConfiguredShellVariablesPreferences,
}
}
func (pe *shellVariablesEncoder) CanHandleAliases() bool {
return false
}
func (pe *shellVariablesEncoder) PrintDocumentSeparator(_ io.Writer) error {
return nil
}
func (pe *shellVariablesEncoder) PrintLeadingContent(_ io.Writer, _ string) error {
return nil
}
func (pe *shellVariablesEncoder) Encode(writer io.Writer, node *CandidateNode) error {
mapKeysToStrings(node)
err := pe.doEncode(&writer, node, "")
if err != nil {
return err
}
return err
}
func (pe *shellVariablesEncoder) doEncode(w *io.Writer, node *CandidateNode, path string) error {
// Note this drops all comments.
switch node.Kind {
case ScalarNode:
nonemptyPath := path
if path == "" {
// We can't assign an empty variable "=somevalue" because that would error out if sourced in a shell,
// nor can we use "_" as a variable name ($_ is a special shell variable that can't be assigned)...
// let's just pick a fallback key to use if we are encoding a single scalar
nonemptyPath = "value"
}
var valueString string
if pe.prefs.UnwrapScalar {
valueString = node.Value
} else {
valueString = quoteValue(node.Value)
}
_, err := io.WriteString(*w, nonemptyPath+"="+valueString+"\n")
return err
case SequenceNode:
for index, child := range node.Content {
err := pe.doEncode(w, child, pe.appendPath(path, index))
if err != nil {
return err
}
}
return nil
case MappingNode:
for index := 0; index < len(node.Content); index = index + 2 {
key := node.Content[index]
value := node.Content[index+1]
err := pe.doEncode(w, value, pe.appendPath(path, key.Value))
if err != nil {
return err
}
}
return nil
case AliasNode:
return pe.doEncode(w, node.Alias, path)
default:
return fmt.Errorf("unsupported node %v", node.Tag)
}
}
func (pe *shellVariablesEncoder) appendPath(cookedPath string, rawKey interface{}) string {
// Shell variable names must match
// [a-zA-Z_]+[a-zA-Z0-9_]*
//
// While this is not mandated by POSIX, which is quite lenient, it is
// what shells (for example busybox ash *) allow in practice.
//
// Since yaml names can contain basically any character, we will process them according to these steps:
//
// 1. apply unicode compatibility decomposition NFKD (this will convert accented
// letters to letters followed by accents, split ligatures, replace exponents
// with the corresponding digit, etc.
//
// 2. discard non-ASCII characters as well as ASCII control characters (ie. anything
// with code point < 32 or > 126), this will eg. discard accents but keep the base
// unaccented letter because of NFKD above
//
// 3. replace all non-alphanumeric characters with _
//
// Moreover, for the root key only, we will prepend an underscore if what results from the steps above
// does not start with [a-zA-Z_] (ie. if the root key starts with a digit).
//
// Note this is NOT a 1:1 mapping.
//
// (*) see endofname.c from https://git.busybox.net/busybox/tag/?h=1_36_0
// XXX empty strings
key := strings.Map(func(r rune) rune {
if isAlphaNumericOrUnderscore(r) {
return r
} else if r < 32 || 126 < r {
return -1
}
return '_'
}, norm.NFKD.String(fmt.Sprintf("%v", rawKey)))
if cookedPath == "" {
firstRune, _ := utf8.DecodeRuneInString(key)
if !isAlphaOrUnderscore(firstRune) {
return "_" + key
}
return key
}
return cookedPath + pe.prefs.KeySeparator + key
}
func quoteValue(value string) string {
needsQuoting := false
for _, r := range value {
if !isAlphaNumericOrUnderscore(r) {
needsQuoting = true
break
}
}
if needsQuoting {
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
}
return value
}
func isAlphaOrUnderscore(r rune) bool {
return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || r == '_'
}
func isAlphaNumericOrUnderscore(r rune) bool {
return isAlphaOrUnderscore(r) || ('0' <= r && r <= '9')
}