yq/pkg/yqlib/encoder_toml.go
Copilot cb97935554
fix: TOML encoder uses inline tables for YAML FlowStyle mappings, inconsistent with explicit JSON parsing (#2687)
* Initial plan

* fix: TOML encoder no longer treats YAML FlowStyle as inline tables

Remove FlowStyle checks from the TOML encoder. YAML flow-style mappings
are a YAML-specific rendering hint and should not influence TOML output.
Only nodes explicitly marked with EncodeHintInline (set by the TOML
decoder for actual TOML inline tables) will produce TOML inline table
syntax.

This fixes the bug where JSON auto-detected via the YAML parser (which
parses {} as flow-style mappings) would produce inline TOML tables
instead of readable table sections, while explicitly parsing with
-p json produced correct table sections.

Updated tests: YAML flow mappings now produce table sections (same as
block mappings), consistent with the fix. Added new test cases for the
JSON → TOML conversion via both YAML decoder (auto-detection) and JSON
decoder.

Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/3e504870-b585-4998-af9c-a451e2f6a6a3

Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>
2026-04-28 19:32:07 +10:00

772 lines
20 KiB
Go

//go:build !yq_notoml
package yqlib
import (
"bytes"
"fmt"
"io"
"strings"
"github.com/fatih/color"
)
type tomlEncoder struct {
wroteRootAttr bool // Track if we wrote root-level attributes before tables
prefs TomlPreferences
}
func NewTomlEncoder() Encoder {
return NewTomlEncoderWithPrefs(ConfiguredTomlPreferences)
}
func NewTomlEncoderWithPrefs(prefs TomlPreferences) Encoder {
return &tomlEncoder{prefs: prefs}
}
func (te *tomlEncoder) Encode(writer io.Writer, node *CandidateNode) error {
if node.Kind != MappingNode {
// For standalone selections, TOML tests expect raw value for scalars
if node.Kind == ScalarNode {
return writeString(writer, node.Value+"\n")
}
return fmt.Errorf("TOML encoder expects a mapping at the root level")
}
// Encode to a buffer first if colors are enabled
var buf bytes.Buffer
var targetWriter io.Writer
targetWriter = writer
if te.prefs.ColorsEnabled {
targetWriter = &buf
}
// Encode a root mapping as a sequence of attributes, tables, and arrays of tables
if err := te.encodeRootMapping(targetWriter, node); err != nil {
return err
}
if te.prefs.ColorsEnabled {
colourised := te.colorizeToml(buf.Bytes())
_, err := writer.Write(colourised)
return err
}
return nil
}
func (te *tomlEncoder) PrintDocumentSeparator(_ io.Writer) error {
return nil
}
func (te *tomlEncoder) PrintLeadingContent(_ io.Writer, _ string) error {
return nil
}
func (te *tomlEncoder) CanHandleAliases() bool {
return false
}
// ---- helpers ----
// tomlKey returns the key quoted if it contains characters that are not valid
// in a TOML bare key. TOML bare keys may only contain ASCII letters, ASCII
// digits, underscores, and dashes.
func tomlKey(key string) string {
for _, r := range key {
if (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && (r < '0' || r > '9') && r != '_' && r != '-' {
return fmt.Sprintf("%q", key)
}
}
return key
}
// tomlDottedKey joins path components, quoting any that require it.
func tomlDottedKey(path []string) string {
parts := make([]string, len(path))
for i, p := range path {
parts[i] = tomlKey(p)
}
return strings.Join(parts, ".")
}
func (te *tomlEncoder) writeComment(w io.Writer, comment string) error {
if comment == "" {
return nil
}
lines := strings.Split(comment, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if !strings.HasPrefix(line, "#") {
line = "# " + line
}
if _, err := w.Write([]byte(line + "\n")); err != nil {
return err
}
}
return nil
}
func (te *tomlEncoder) formatScalar(node *CandidateNode) string {
switch node.Tag {
case "!!str":
// Quote strings per TOML spec
return fmt.Sprintf("%q", node.Value)
case "!!bool", "!!int", "!!float":
return node.Value
case "!!null":
// TOML does not have null; encode as empty string
return `""`
default:
return node.Value
}
}
func (te *tomlEncoder) encodeRootMapping(w io.Writer, node *CandidateNode) error {
te.wroteRootAttr = false // Reset state
// Write root head comment if present (at the very beginning, no leading blank line)
if node.HeadComment != "" {
if err := te.writeComment(w, node.HeadComment); err != nil {
return err
}
}
// Preserve existing order by iterating Content
for i := 0; i < len(node.Content); i += 2 {
keyNode := node.Content[i]
valNode := node.Content[i+1]
if err := te.encodeTopLevelEntry(w, []string{keyNode.Value}, valNode); err != nil {
return err
}
}
return nil
}
// encodeTopLevelEntry encodes a key/value at the root, dispatching to attribute, table, or array-of-tables
func (te *tomlEncoder) encodeTopLevelEntry(w io.Writer, path []string, node *CandidateNode) error {
if len(path) == 0 {
return fmt.Errorf("cannot encode TOML entry with empty path")
}
switch node.Kind {
case ScalarNode:
// key = value
return te.writeAttribute(w, path[len(path)-1], node)
case SequenceNode:
// Empty arrays should be encoded as [] attributes
if len(node.Content) == 0 {
return te.writeArrayAttribute(w, path[len(path)-1], node)
}
// If all items are mappings => array of tables; else => array attribute
allMaps := true
for _, it := range node.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if allMaps {
key := path[len(path)-1]
quotedKey := tomlKey(key)
for _, it := range node.Content {
// [[key]] then body
if _, err := w.Write([]byte("[[" + quotedKey + "]]\n")); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, []string{key}, it); err != nil {
return err
}
}
return nil
}
// Regular array attribute
return te.writeArrayAttribute(w, path[len(path)-1], node)
case MappingNode:
// Use inline table syntax only for nodes explicitly marked as TOML inline tables.
// YAML flow-style mappings are not treated as inline tables; the FlowStyle attribute
// is a YAML-specific rendering hint and should not affect TOML output. This ensures
// that auto-detected JSON input (parsed as YAML flow style) produces readable table
// sections, consistent with explicitly parsed JSON input.
if node.EncodeHint == EncodeHintInline {
return te.writeInlineTableAttribute(w, path[len(path)-1], node)
}
return te.encodeSeparateMapping(w, path, node)
default:
return fmt.Errorf("unsupported node kind for TOML: %v", node.Kind)
}
}
func isTomlArrayOfTables(seq *CandidateNode) bool {
if len(seq.Content) == 0 {
return false
}
for _, it := range seq.Content {
if it.Kind != MappingNode {
return false
}
}
return true
}
func (te *tomlEncoder) writeAttribute(w io.Writer, key string, value *CandidateNode) error {
te.wroteRootAttr = true // Mark that we wrote a root attribute
// Write head comment before the attribute
if err := te.writeComment(w, value.HeadComment); err != nil {
return err
}
// Write the attribute
line := tomlKey(key) + " = " + te.formatScalar(value)
// Add line comment if present
if value.LineComment != "" {
lineComment := strings.TrimSpace(value.LineComment)
if !strings.HasPrefix(lineComment, "#") {
lineComment = "# " + lineComment
}
line += " " + lineComment
}
_, err := w.Write([]byte(line + "\n"))
return err
}
func (te *tomlEncoder) writeArrayAttribute(w io.Writer, key string, seq *CandidateNode) error {
te.wroteRootAttr = true // Mark that we wrote a root attribute
// Write head comment before the array
if err := te.writeComment(w, seq.HeadComment); err != nil {
return err
}
// Handle empty arrays
if len(seq.Content) == 0 {
line := tomlKey(key) + " = []"
if seq.LineComment != "" {
lineComment := strings.TrimSpace(seq.LineComment)
if !strings.HasPrefix(lineComment, "#") {
lineComment = "# " + lineComment
}
line += " " + lineComment
}
_, err := w.Write([]byte(line + "\n"))
return err
}
// Check if any array elements have head comments - if so, use multiline format
hasElementComments := false
for _, it := range seq.Content {
if it.HeadComment != "" {
hasElementComments = true
break
}
}
if hasElementComments {
// Write multiline array format with comments
if _, err := w.Write([]byte(tomlKey(key) + " = [\n")); err != nil {
return err
}
for i, it := range seq.Content {
// Write head comment for this element
if it.HeadComment != "" {
commentLines := strings.Split(it.HeadComment, "\n")
for _, commentLine := range commentLines {
if strings.TrimSpace(commentLine) != "" {
if !strings.HasPrefix(strings.TrimSpace(commentLine), "#") {
commentLine = "# " + commentLine
}
if _, err := w.Write([]byte(" " + commentLine + "\n")); err != nil {
return err
}
}
}
}
// Write the element value
var itemStr string
switch it.Kind {
case ScalarNode:
itemStr = te.formatScalar(it)
case SequenceNode:
nested, err := te.sequenceToInlineArray(it)
if err != nil {
return err
}
itemStr = nested
case MappingNode:
inline, err := te.mappingToInlineTable(it)
if err != nil {
return err
}
itemStr = inline
case AliasNode:
return fmt.Errorf("aliases are not supported in TOML")
default:
return fmt.Errorf("unsupported array item kind: %v", it.Kind)
}
// Always add trailing comma in multiline arrays
itemStr += ","
if _, err := w.Write([]byte(" " + itemStr + "\n")); err != nil {
return err
}
// Add blank line between elements (except after the last one)
if i < len(seq.Content)-1 {
if _, err := w.Write([]byte("\n")); err != nil {
return err
}
}
}
if _, err := w.Write([]byte("]\n")); err != nil {
return err
}
return nil
}
// Join scalars or nested arrays recursively into TOML array syntax
items := make([]string, 0, len(seq.Content))
for _, it := range seq.Content {
switch it.Kind {
case ScalarNode:
items = append(items, te.formatScalar(it))
case SequenceNode:
// Nested arrays: encode inline
nested, err := te.sequenceToInlineArray(it)
if err != nil {
return err
}
items = append(items, nested)
case MappingNode:
// Inline table inside array
inline, err := te.mappingToInlineTable(it)
if err != nil {
return err
}
items = append(items, inline)
case AliasNode:
return fmt.Errorf("aliases are not supported in TOML")
default:
return fmt.Errorf("unsupported array item kind: %v", it.Kind)
}
}
line := tomlKey(key) + " = [" + strings.Join(items, ", ") + "]"
// Add line comment if present
if seq.LineComment != "" {
lineComment := strings.TrimSpace(seq.LineComment)
if !strings.HasPrefix(lineComment, "#") {
lineComment = "# " + lineComment
}
line += " " + lineComment
}
_, err := w.Write([]byte(line + "\n"))
return err
}
func (te *tomlEncoder) sequenceToInlineArray(seq *CandidateNode) (string, error) {
items := make([]string, 0, len(seq.Content))
for _, it := range seq.Content {
switch it.Kind {
case ScalarNode:
items = append(items, te.formatScalar(it))
case SequenceNode:
nested, err := te.sequenceToInlineArray(it)
if err != nil {
return "", err
}
items = append(items, nested)
case MappingNode:
inline, err := te.mappingToInlineTable(it)
if err != nil {
return "", err
}
items = append(items, inline)
default:
return "", fmt.Errorf("unsupported array item kind: %v", it.Kind)
}
}
return "[" + strings.Join(items, ", ") + "]", nil
}
func (te *tomlEncoder) mappingToInlineTable(m *CandidateNode) (string, error) {
// key = { a = 1, b = "x" }
parts := make([]string, 0, len(m.Content)/2)
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
switch v.Kind {
case ScalarNode:
parts = append(parts, fmt.Sprintf("%s = %s", tomlKey(k), te.formatScalar(v)))
case SequenceNode:
// inline array in inline table
arr, err := te.sequenceToInlineArray(v)
if err != nil {
return "", err
}
parts = append(parts, fmt.Sprintf("%s = %s", tomlKey(k), arr))
case MappingNode:
// nested inline table
inline, err := te.mappingToInlineTable(v)
if err != nil {
return "", err
}
parts = append(parts, fmt.Sprintf("%s = %s", tomlKey(k), inline))
default:
return "", fmt.Errorf("unsupported inline table value kind: %v", v.Kind)
}
}
return "{ " + strings.Join(parts, ", ") + " }", nil
}
func (te *tomlEncoder) writeInlineTableAttribute(w io.Writer, key string, m *CandidateNode) error {
inline, err := te.mappingToInlineTable(m)
if err != nil {
return err
}
_, err = w.Write([]byte(tomlKey(key) + " = " + inline + "\n"))
return err
}
func (te *tomlEncoder) writeTableHeader(w io.Writer, path []string, m *CandidateNode) error {
// Add blank line before table header (or before comment if present) if we wrote root attributes
needsBlankLine := te.wroteRootAttr
if needsBlankLine {
if _, err := w.Write([]byte("\n")); err != nil {
return err
}
te.wroteRootAttr = false // Only add once
}
// Write head comment before the table header
if m.HeadComment != "" {
if err := te.writeComment(w, m.HeadComment); err != nil {
return err
}
}
// Write table header [a.b.c]
header := "[" + tomlDottedKey(path) + "]\n"
_, err := w.Write([]byte(header))
return err
}
// encodeSeparateMapping handles a mapping that should be encoded as table sections.
// It emits the table header for this mapping if it has any content, then processes children.
func (te *tomlEncoder) encodeSeparateMapping(w io.Writer, path []string, m *CandidateNode) error {
// Check if this mapping has any non-mapping, non-array-of-tables children (i.e., attributes).
// Inline mapping children also count as attributes since they render as key = { ... }.
hasAttrs := false
for i := 0; i < len(m.Content); i += 2 {
v := m.Content[i+1]
if v.Kind == ScalarNode {
hasAttrs = true
break
}
if v.Kind == MappingNode && v.EncodeHint == EncodeHintInline {
hasAttrs = true
break
}
if v.Kind == SequenceNode {
if !isTomlArrayOfTables(v) {
hasAttrs = true
break
}
}
}
// If there are attributes or if the mapping is empty, emit the table header
if hasAttrs || len(m.Content) == 0 {
if err := te.writeTableHeader(w, path, m); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, path, m); err != nil {
return err
}
return nil
}
// No attributes, just nested table structures - process children recursively
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
switch v.Kind {
case MappingNode:
newPath := append(append([]string{}, path...), k)
if err := te.encodeSeparateMapping(w, newPath, v); err != nil {
return err
}
case SequenceNode:
// If sequence of maps, emit [[path.k]] per element
if isTomlArrayOfTables(v) {
key := tomlDottedKey(append(append([]string{}, path...), k))
for _, it := range v.Content {
if _, err := w.Write([]byte("[[" + key + "]]\n")); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, append(append([]string{}, path...), k), it); err != nil {
return err
}
}
} else {
// Regular array attribute under the current table path
if err := te.writeArrayAttribute(w, k, v); err != nil {
return err
}
}
case ScalarNode:
// Attributes directly under the current table path
if err := te.writeAttribute(w, k, v); err != nil {
return err
}
}
}
return nil
}
// encodeMappingBodyWithPath encodes attributes and nested arrays of tables using full dotted path context
func (te *tomlEncoder) encodeMappingBodyWithPath(w io.Writer, path []string, m *CandidateNode) error {
// First, attributes (scalars and non-map arrays)
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
switch v.Kind {
case ScalarNode:
if err := te.writeAttribute(w, k, v); err != nil {
return err
}
case SequenceNode:
if !isTomlArrayOfTables(v) {
if err := te.writeArrayAttribute(w, k, v); err != nil {
return err
}
}
}
}
// Then, nested arrays of tables with full path
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
if v.Kind == SequenceNode {
if isTomlArrayOfTables(v) {
dotted := tomlDottedKey(append(append([]string{}, path...), k))
for _, it := range v.Content {
if _, err := w.Write([]byte("[[" + dotted + "]]\n")); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, append(append([]string{}, path...), k), it); err != nil {
return err
}
}
}
}
}
// Finally, child mappings: inline-hint ones become inline table attributes,
// while all others are emitted as separate sub-table sections.
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
if v.Kind == MappingNode {
if v.EncodeHint == EncodeHintInline {
if err := te.writeInlineTableAttribute(w, k, v); err != nil {
return err
}
} else {
subPath := append(append([]string{}, path...), k)
if err := te.encodeSeparateMapping(w, subPath, v); err != nil {
return err
}
}
}
}
return nil
}
// colorizeToml applies syntax highlighting to TOML output using fatih/color
func (te *tomlEncoder) colorizeToml(input []byte) []byte {
toml := string(input)
result := strings.Builder{}
// Force color output (don't check for TTY)
color.NoColor = false
// Create color functions for different token types
// Use EnableColor() to ensure colors work even when NO_COLOR env is set
commentColorObj := color.New(color.FgHiBlack)
commentColorObj.EnableColor()
stringColorObj := color.New(color.FgGreen)
stringColorObj.EnableColor()
numberColorObj := color.New(color.FgHiMagenta)
numberColorObj.EnableColor()
keyColorObj := color.New(color.FgCyan)
keyColorObj.EnableColor()
boolColorObj := color.New(color.FgHiMagenta)
boolColorObj.EnableColor()
sectionColorObj := color.New(color.FgYellow, color.Bold)
sectionColorObj.EnableColor()
commentColor := commentColorObj.SprintFunc()
stringColor := stringColorObj.SprintFunc()
numberColor := numberColorObj.SprintFunc()
keyColor := keyColorObj.SprintFunc()
boolColor := boolColorObj.SprintFunc()
sectionColor := sectionColorObj.SprintFunc()
// Simple tokenization for TOML colouring
i := 0
for i < len(toml) {
ch := toml[i]
// Comments - from # to end of line
if ch == '#' {
end := i
for end < len(toml) && toml[end] != '\n' {
end++
}
result.WriteString(commentColor(toml[i:end]))
i = end
continue
}
// Table sections - [section] or [[array]]
// Only treat '[' as a table section if it appears at the start of the line
// (possibly after whitespace). This avoids mis-colouring inline arrays like
// "ports = [8000, 8001]" as table sections.
if ch == '[' {
isSectionHeader := true
if i > 0 {
isSectionHeader = false
j := i - 1
for j >= 0 && toml[j] != '\n' {
if toml[j] != ' ' && toml[j] != '\t' && toml[j] != '\r' {
// Found a non-whitespace character before this '[' on the same line,
// so this is not a table header.
break
}
j--
}
if j < 0 || toml[j] == '\n' {
// Reached the start of the string or a newline without encountering
// any non-whitespace, so '[' is at the logical start of the line.
isSectionHeader = true
}
}
if isSectionHeader {
end := i + 1
// Check for [[
if end < len(toml) && toml[end] == '[' {
end++
}
// Find closing ]
for end < len(toml) && toml[end] != ']' {
end++
}
// Include closing ]
if end < len(toml) {
end++
// Check for ]]
if end < len(toml) && toml[end] == ']' {
end++
}
}
result.WriteString(sectionColor(toml[i:end]))
i = end
continue
}
}
// Strings - quoted text (double or single quotes)
if ch == '"' || ch == '\'' {
quote := ch
end := i + 1
for end < len(toml) {
if toml[end] == quote {
break
}
if toml[end] == '\\' && end+1 < len(toml) {
// Skip the backslash and the escaped character
end += 2
continue
}
end++
}
if end < len(toml) {
end++ // include closing quote
}
result.WriteString(stringColor(toml[i:end]))
i = end
continue
}
// Numbers - sequences of digits, possibly with decimal point or minus
if (ch >= '0' && ch <= '9') || (ch == '-' && i+1 < len(toml) && toml[i+1] >= '0' && toml[i+1] <= '9') {
end := i
if ch == '-' {
end++
}
for end < len(toml) {
c := toml[end]
if (c >= '0' && c <= '9') || c == '.' || c == 'e' || c == 'E' {
end++
} else if (c == '+' || c == '-') && end > 0 && (toml[end-1] == 'e' || toml[end-1] == 'E') {
// Only allow + or - immediately after 'e' or 'E' for scientific notation
end++
} else {
break
}
}
result.WriteString(numberColor(toml[i:end]))
i = end
continue
}
// Identifiers/keys - alphanumeric + underscore + dash
if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' {
end := i
for end < len(toml) && ((toml[end] >= 'a' && toml[end] <= 'z') ||
(toml[end] >= 'A' && toml[end] <= 'Z') ||
(toml[end] >= '0' && toml[end] <= '9') ||
toml[end] == '_' || toml[end] == '-') {
end++
}
ident := toml[i:end]
// Check if this is a boolean/null keyword
switch ident {
case "true", "false":
result.WriteString(boolColor(ident))
default:
// Check if followed by = or whitespace then = (it's a key)
j := end
for j < len(toml) && (toml[j] == ' ' || toml[j] == '\t') {
j++
}
if j < len(toml) && toml[j] == '=' {
result.WriteString(keyColor(ident))
} else {
result.WriteString(ident) // plain text for other identifiers
}
}
i = end
continue
}
// Everything else (whitespace, operators, brackets) - no color
result.WriteByte(ch)
i++
}
return []byte(result.String())
}