Merge branch 'master' into switch-to-uk

This commit is contained in:
Mike Farah 2025-12-20 16:05:07 +11:00
commit 0754211914
13 changed files with 1375 additions and 27 deletions

View File

@ -1,3 +1,16 @@
# General rules
✅ **DO:**
- You can use ./yq with the `--debug-node-info` flag to get a deeper understanding of the ast.
- run ./scripts/format.sh to format the code; then ./scripts/check.sh lint and finally ./scripts/spelling.sh to check spelling.
- Add comprehensive tests to cover the changes
- Run test suite to ensure there is no regression
- Use UK english spelling (e.g. Colorisation not Colorization)
❌ **DON'T:**
- Git add or commit
# Adding a New Encoder/Decoder
This guide explains how to add support for a new format (encoder/decoder) to yq without modifying `candidate_node.go`.

View File

@ -206,6 +206,7 @@ func configureEncoder() (yqlib.Encoder, error) {
yqlib.ConfiguredYamlPreferences.ColorsEnabled = colorsEnabled
yqlib.ConfiguredJSONPreferences.ColorsEnabled = colorsEnabled
yqlib.ConfiguredHclPreferences.ColorsEnabled = colorsEnabled
yqlib.ConfiguredTomlPreferences.ColorsEnabled = colorsEnabled
yqlib.ConfiguredYamlPreferences.PrintDocSeparators = !noDocSeparators

View File

@ -1,6 +1,26 @@
[[fruits]]
[animals]
[[fruits.varieties]] # nested array of tables
name = "red delicious"
# This is a TOML document
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00
[database]
enabled = true
ports = [ 8000, 8001, 8002 ]
data = [ ["delta", "phi"], [3.14] ]
temp_targets = { cpu = 79.5, case = 72.0 }
[servers]
[servers.alpha]
ip = "10.0.0.1"
role = "frontend"
[servers.beta]
ip = "10.0.0.2"
role = "backend"

View File

@ -465,6 +465,9 @@ func (n *CandidateNode) UpdateAttributesFrom(other *CandidateNode, prefs assignP
n.Anchor = other.Anchor
}
// Preserve EncodeSeparate flag for format-specific encoding hints
n.EncodeSeparate = other.EncodeSeparate
// merge will pickup the style of the new thing
// when autocreating nodes

View File

@ -8,16 +8,19 @@ import (
"fmt"
"io"
"strconv"
"strings"
"time"
toml "github.com/pelletier/go-toml/v2/unstable"
)
type tomlDecoder struct {
parser toml.Parser
finished bool
d DataTreeNavigator
rootMap *CandidateNode
parser toml.Parser
finished bool
d DataTreeNavigator
rootMap *CandidateNode
pendingComments []string // Head comments collected from Comment nodes
firstContentSeen bool // Track if we've processed the first non-comment node
}
func NewTomlDecoder() Decoder {
@ -28,7 +31,7 @@ func NewTomlDecoder() Decoder {
}
func (dec *tomlDecoder) Init(reader io.Reader) error {
dec.parser = toml.Parser{}
dec.parser = toml.Parser{KeepComments: true}
buf := new(bytes.Buffer)
_, err := buf.ReadFrom(reader)
if err != nil {
@ -39,6 +42,8 @@ func (dec *tomlDecoder) Init(reader io.Reader) error {
Kind: MappingNode,
Tag: "!!map",
}
dec.pendingComments = make([]string, 0)
dec.firstContentSeen = false
return nil
}
@ -56,13 +61,24 @@ func (dec *tomlDecoder) getFullPath(tomlNode *toml.Node) []interface{} {
func (dec *tomlDecoder) processKeyValueIntoMap(rootMap *CandidateNode, tomlNode *toml.Node) error {
value := tomlNode.Value()
path := dec.getFullPath(value.Next())
log.Debug("processKeyValueIntoMap: %v", path)
valueNode, err := dec.decodeNode(value)
if err != nil {
return err
}
// Attach pending head comments
if len(dec.pendingComments) > 0 {
valueNode.HeadComment = strings.Join(dec.pendingComments, "\n")
dec.pendingComments = make([]string, 0)
}
// Check for inline comment chained to the KeyValue node
nextNode := tomlNode.Next()
if nextNode != nil && nextNode.Kind == toml.Comment {
valueNode.LineComment = string(nextNode.Data)
}
context := Context{}
context = context.SingleChildContext(rootMap)
@ -79,11 +95,15 @@ func (dec *tomlDecoder) decodeKeyValuesIntoMap(rootMap *CandidateNode, tomlNode
nextItem := dec.parser.Expression()
log.Debug("decodeKeyValuesIntoMap -- next exp, its a %v", nextItem.Kind)
if nextItem.Kind == toml.KeyValue {
switch nextItem.Kind {
case toml.KeyValue:
if err := dec.processKeyValueIntoMap(rootMap, nextItem); err != nil {
return false, err
}
} else {
case toml.Comment:
// Standalone comment - add to pending for next element
dec.pendingComments = append(dec.pendingComments, string(nextItem.Data))
default:
// run out of key values
log.Debug("done in decodeKeyValuesIntoMap, gota a %v", nextItem.Kind)
return true, nil
@ -250,11 +270,29 @@ func (dec *tomlDecoder) processTopLevelNode(currentNode *toml.Node) (bool, error
var err error
log.Debug("processTopLevelNode: Going to process %v state is current %v", currentNode.Kind, NodeToString(dec.rootMap))
switch currentNode.Kind {
case toml.Comment:
// Collect comment to attach to next element
commentText := string(currentNode.Data)
// If we haven't seen any content yet, accumulate comments for root
if !dec.firstContentSeen {
if dec.rootMap.HeadComment == "" {
dec.rootMap.HeadComment = commentText
} else {
dec.rootMap.HeadComment = dec.rootMap.HeadComment + "\n" + commentText
}
} else {
// We've seen content, so these comments are for the next element
dec.pendingComments = append(dec.pendingComments, commentText)
}
return false, nil
case toml.Table:
dec.firstContentSeen = true
runAgainstCurrentExp, err = dec.processTable(currentNode)
case toml.ArrayTable:
dec.firstContentSeen = true
runAgainstCurrentExp, err = dec.processArrayTable(currentNode)
default:
dec.firstContentSeen = true
runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(dec.rootMap, currentNode)
}
@ -264,7 +302,8 @@ func (dec *tomlDecoder) processTopLevelNode(currentNode *toml.Node) (bool, error
func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) {
log.Debug("Enter processTable")
fullPath := dec.getFullPath(currentNode.Child())
child := currentNode.Child()
fullPath := dec.getFullPath(child)
log.Debug("fullpath: %v", fullPath)
c := Context{}
@ -276,9 +315,16 @@ func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) {
}
tableNodeValue := &CandidateNode{
Kind: MappingNode,
Tag: "!!map",
Content: make([]*CandidateNode, 0),
Kind: MappingNode,
Tag: "!!map",
Content: make([]*CandidateNode, 0),
EncodeSeparate: true,
}
// Attach pending head comments to the table
if len(dec.pendingComments) > 0 {
tableNodeValue.HeadComment = strings.Join(dec.pendingComments, "\n")
dec.pendingComments = make([]string, 0)
}
var tableValue *toml.Node
@ -330,7 +376,8 @@ func (dec *tomlDecoder) arrayAppend(context Context, path []interface{}, rhsNode
func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error) {
log.Debug("Enter processArrayTable")
fullPath := dec.getFullPath(currentNode.Child())
child := currentNode.Child()
fullPath := dec.getFullPath(child)
log.Debug("Fullpath: %v", fullPath)
c := Context{}
@ -346,9 +393,17 @@ func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error)
hasValue := dec.parser.NextExpression()
tableNodeValue := &CandidateNode{
Kind: MappingNode,
Tag: "!!map",
Kind: MappingNode,
Tag: "!!map",
EncodeSeparate: true,
}
// Attach pending head comments to the array table
if len(dec.pendingComments) > 0 {
tableNodeValue.HeadComment = strings.Join(dec.pendingComments, "\n")
dec.pendingComments = make([]string, 0)
}
runAgainstCurrentExp := false
// if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair)
// so lets leave that expression for the next round of parsing

View File

@ -7,7 +7,7 @@ HCL is commonly used in HashiCorp tools like Terraform for configuration files.
- String interpolation and expressions (preserved without quotes)
- Comments (leading, head, and line comments)
- Nested structures (maps and lists)
- Syntax colorization when enabled
- Syntax colorisation when enabled
## Parse HCL

View File

@ -7,5 +7,5 @@ HCL is commonly used in HashiCorp tools like Terraform for configuration files.
- String interpolation and expressions (preserved without quotes)
- Comments (leading, head, and line comments)
- Nested structures (maps and lists)
- Syntax colorization when enabled
- Syntax colorisation when enabled

View File

@ -141,3 +141,191 @@ will output
dependencies: {}
```
## Roundtrip: inline table attribute
Given a sample.toml file of:
```toml
name = { first = "Tom", last = "Preston-Werner" }
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
name = { first = "Tom", last = "Preston-Werner" }
```
## Roundtrip: table section
Given a sample.toml file of:
```toml
[owner.contact]
name = "Tom"
age = 36
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
[owner.contact]
name = "Tom"
age = 36
```
## Roundtrip: array of tables
Given a sample.toml file of:
```toml
[[fruits]]
name = "apple"
[[fruits.varieties]]
name = "red delicious"
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
[[fruits]]
name = "apple"
[[fruits.varieties]]
name = "red delicious"
```
## Roundtrip: arrays and scalars
Given a sample.toml file of:
```toml
A = ["hello", ["world", "again"]]
B = 12
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
A = ["hello", ["world", "again"]]
B = 12
```
## Roundtrip: simple
Given a sample.toml file of:
```toml
A = "hello"
B = 12
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
A = "hello"
B = 12
```
## Roundtrip: deep paths
Given a sample.toml file of:
```toml
[person]
name = "hello"
address = "12 cat st"
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
[person]
name = "hello"
address = "12 cat st"
```
## Roundtrip: empty array
Given a sample.toml file of:
```toml
A = []
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
A = []
```
## Roundtrip: sample table
Given a sample.toml file of:
```toml
var = "x"
[owner.contact]
name = "Tom Preston-Werner"
age = 36
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
var = "x"
[owner.contact]
name = "Tom Preston-Werner"
age = 36
```
## Roundtrip: empty table
Given a sample.toml file of:
```toml
[dependencies]
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
[dependencies]
```
## Roundtrip: comments
Given a sample.toml file of:
```toml
# This is a comment
A = "hello" # inline comment
B = 12
# Table comment
[person]
name = "Tom" # name comment
```
then
```bash
yq '.' sample.toml
```
will output
```yaml
# This is a comment
A = "hello" # inline comment
B = 12
# Table comment
[person]
name = "Tom" # name comment
```

View File

@ -1,22 +1,56 @@
package yqlib
import (
"bytes"
"fmt"
"io"
"strings"
"github.com/fatih/color"
)
type tomlEncoder struct {
wroteRootAttr bool // Track if we wrote root-level attributes before tables
prefs TomlPreferences
}
func NewTomlEncoder() Encoder {
return &tomlEncoder{}
return NewTomlEncoderWithPrefs(ConfiguredTomlPreferences)
}
func NewTomlEncoderWithPrefs(prefs TomlPreferences) Encoder {
return &tomlEncoder{prefs: prefs}
}
func (te *tomlEncoder) Encode(writer io.Writer, node *CandidateNode) error {
if node.Kind == ScalarNode {
return writeString(writer, node.Value+"\n")
if node.Kind != MappingNode {
// For standalone selections, TOML tests expect raw value for scalars
if node.Kind == ScalarNode {
return writeString(writer, node.Value+"\n")
}
return fmt.Errorf("TOML encoder expects a mapping at the root level")
}
return fmt.Errorf("only scalars (e.g. strings, numbers, booleans) are supported for TOML output at the moment. Please use yaml output format (-oy) until the encoder has been fully implemented")
// Encode to a buffer first if colors are enabled
var buf bytes.Buffer
var targetWriter io.Writer
targetWriter = writer
if te.prefs.ColorsEnabled {
targetWriter = &buf
}
// Encode a root mapping as a sequence of attributes, tables, and arrays of tables
if err := te.encodeRootMapping(targetWriter, node); err != nil {
return err
}
if te.prefs.ColorsEnabled {
colorized := te.colorizeToml(buf.Bytes())
_, err := writer.Write(colorized)
return err
}
return nil
}
func (te *tomlEncoder) PrintDocumentSeparator(_ io.Writer) error {
@ -30,3 +64,632 @@ func (te *tomlEncoder) PrintLeadingContent(_ io.Writer, _ string) error {
func (te *tomlEncoder) CanHandleAliases() bool {
return false
}
// ---- helpers ----
func (te *tomlEncoder) writeComment(w io.Writer, comment string) error {
if comment == "" {
return nil
}
lines := strings.Split(comment, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if !strings.HasPrefix(line, "#") {
line = "# " + line
}
if _, err := w.Write([]byte(line + "\n")); err != nil {
return err
}
}
return nil
}
func (te *tomlEncoder) formatScalar(node *CandidateNode) string {
switch node.Tag {
case "!!str":
// Quote strings per TOML spec
return fmt.Sprintf("%q", node.Value)
case "!!bool", "!!int", "!!float":
return node.Value
case "!!null":
// TOML does not have null; encode as empty string
return `""`
default:
return node.Value
}
}
func (te *tomlEncoder) encodeRootMapping(w io.Writer, node *CandidateNode) error {
te.wroteRootAttr = false // Reset state
// Write root head comment if present (at the very beginning, no leading blank line)
if node.HeadComment != "" {
if err := te.writeComment(w, node.HeadComment); err != nil {
return err
}
}
// Preserve existing order by iterating Content
for i := 0; i < len(node.Content); i += 2 {
keyNode := node.Content[i]
valNode := node.Content[i+1]
if err := te.encodeTopLevelEntry(w, []string{keyNode.Value}, valNode); err != nil {
return err
}
}
return nil
}
// encodeTopLevelEntry encodes a key/value at the root, dispatching to attribute, table, or array-of-tables
func (te *tomlEncoder) encodeTopLevelEntry(w io.Writer, path []string, node *CandidateNode) error {
switch node.Kind {
case ScalarNode:
// key = value
return te.writeAttribute(w, path[len(path)-1], node)
case SequenceNode:
// Empty arrays should be encoded as [] attributes
if len(node.Content) == 0 {
return te.writeArrayAttribute(w, path[len(path)-1], node)
}
// If all items are mappings => array of tables; else => array attribute
allMaps := true
for _, it := range node.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if allMaps {
key := path[len(path)-1]
for _, it := range node.Content {
// [[key]] then body
if _, err := w.Write([]byte("[[" + key + "]]\n")); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, []string{key}, it); err != nil {
return err
}
}
return nil
}
// Regular array attribute
return te.writeArrayAttribute(w, path[len(path)-1], node)
case MappingNode:
// Inline table if not EncodeSeparate, else emit separate tables/arrays of tables for children under this path
if !node.EncodeSeparate {
// If children contain mappings or arrays of mappings, prefer separate sections
if te.hasEncodeSeparateChild(node) || te.hasStructuralChildren(node) {
return te.encodeSeparateMapping(w, path, node)
}
return te.writeInlineTableAttribute(w, path[len(path)-1], node)
}
return te.encodeSeparateMapping(w, path, node)
default:
return fmt.Errorf("unsupported node kind for TOML: %v", node.Kind)
}
}
func (te *tomlEncoder) writeAttribute(w io.Writer, key string, value *CandidateNode) error {
te.wroteRootAttr = true // Mark that we wrote a root attribute
// Write head comment before the attribute
if err := te.writeComment(w, value.HeadComment); err != nil {
return err
}
// Write the attribute
line := key + " = " + te.formatScalar(value)
// Add line comment if present
if value.LineComment != "" {
lineComment := strings.TrimSpace(value.LineComment)
if !strings.HasPrefix(lineComment, "#") {
lineComment = "# " + lineComment
}
line += " " + lineComment
}
_, err := w.Write([]byte(line + "\n"))
return err
}
func (te *tomlEncoder) writeArrayAttribute(w io.Writer, key string, seq *CandidateNode) error {
te.wroteRootAttr = true // Mark that we wrote a root attribute
// Write head comment before the array
if err := te.writeComment(w, seq.HeadComment); err != nil {
return err
}
// Handle empty arrays
if len(seq.Content) == 0 {
line := key + " = []"
if seq.LineComment != "" {
lineComment := strings.TrimSpace(seq.LineComment)
if !strings.HasPrefix(lineComment, "#") {
lineComment = "# " + lineComment
}
line += " " + lineComment
}
_, err := w.Write([]byte(line + "\n"))
return err
}
// Join scalars or nested arrays recursively into TOML array syntax
items := make([]string, 0, len(seq.Content))
for _, it := range seq.Content {
switch it.Kind {
case ScalarNode:
items = append(items, te.formatScalar(it))
case SequenceNode:
// Nested arrays: encode inline
nested, err := te.sequenceToInlineArray(it)
if err != nil {
return err
}
items = append(items, nested)
case MappingNode:
// Inline table inside array
inline, err := te.mappingToInlineTable(it)
if err != nil {
return err
}
items = append(items, inline)
case AliasNode:
return fmt.Errorf("aliases are not supported in TOML")
default:
return fmt.Errorf("unsupported array item kind: %v", it.Kind)
}
}
line := key + " = [" + strings.Join(items, ", ") + "]"
// Add line comment if present
if seq.LineComment != "" {
lineComment := strings.TrimSpace(seq.LineComment)
if !strings.HasPrefix(lineComment, "#") {
lineComment = "# " + lineComment
}
line += " " + lineComment
}
_, err := w.Write([]byte(line + "\n"))
return err
}
func (te *tomlEncoder) sequenceToInlineArray(seq *CandidateNode) (string, error) {
items := make([]string, 0, len(seq.Content))
for _, it := range seq.Content {
switch it.Kind {
case ScalarNode:
items = append(items, te.formatScalar(it))
case SequenceNode:
nested, err := te.sequenceToInlineArray(it)
if err != nil {
return "", err
}
items = append(items, nested)
case MappingNode:
inline, err := te.mappingToInlineTable(it)
if err != nil {
return "", err
}
items = append(items, inline)
default:
return "", fmt.Errorf("unsupported array item kind: %v", it.Kind)
}
}
return "[" + strings.Join(items, ", ") + "]", nil
}
func (te *tomlEncoder) mappingToInlineTable(m *CandidateNode) (string, error) {
// key = { a = 1, b = "x" }
parts := make([]string, 0, len(m.Content)/2)
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
switch v.Kind {
case ScalarNode:
parts = append(parts, fmt.Sprintf("%s = %s", k, te.formatScalar(v)))
case SequenceNode:
// inline array in inline table
arr, err := te.sequenceToInlineArray(v)
if err != nil {
return "", err
}
parts = append(parts, fmt.Sprintf("%s = %s", k, arr))
case MappingNode:
// nested inline table
inline, err := te.mappingToInlineTable(v)
if err != nil {
return "", err
}
parts = append(parts, fmt.Sprintf("%s = %s", k, inline))
default:
return "", fmt.Errorf("unsupported inline table value kind: %v", v.Kind)
}
}
return "{ " + strings.Join(parts, ", ") + " }", nil
}
func (te *tomlEncoder) writeInlineTableAttribute(w io.Writer, key string, m *CandidateNode) error {
inline, err := te.mappingToInlineTable(m)
if err != nil {
return err
}
_, err = w.Write([]byte(key + " = " + inline + "\n"))
return err
}
func (te *tomlEncoder) writeTableHeader(w io.Writer, path []string, m *CandidateNode) error {
// Add blank line before table header (or before comment if present) if we wrote root attributes
needsBlankLine := te.wroteRootAttr
if needsBlankLine {
if _, err := w.Write([]byte("\n")); err != nil {
return err
}
te.wroteRootAttr = false // Only add once
}
// Write head comment before the table header
if m.HeadComment != "" {
if err := te.writeComment(w, m.HeadComment); err != nil {
return err
}
}
// Write table header [a.b.c]
header := "[" + strings.Join(path, ".") + "]\n"
_, err := w.Write([]byte(header))
return err
}
// encodeSeparateMapping handles a mapping that should be encoded as table sections.
// It emits the table header for this mapping if it has any content, then processes children.
func (te *tomlEncoder) encodeSeparateMapping(w io.Writer, path []string, m *CandidateNode) error {
// Check if this mapping has any non-mapping, non-array-of-tables children (i.e., attributes)
hasAttrs := false
for i := 0; i < len(m.Content); i += 2 {
v := m.Content[i+1]
if v.Kind == ScalarNode {
hasAttrs = true
break
}
if v.Kind == SequenceNode {
// Check if it's NOT an array of tables
allMaps := true
for _, it := range v.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if !allMaps {
hasAttrs = true
break
}
}
}
// If there are attributes or if the mapping is empty, emit the table header
if hasAttrs || len(m.Content) == 0 {
if err := te.writeTableHeader(w, path, m); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, path, m); err != nil {
return err
}
return nil
}
// No attributes, just nested structures - process children
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
switch v.Kind {
case MappingNode:
// Emit [path.k]
newPath := append(append([]string{}, path...), k)
if err := te.writeTableHeader(w, newPath, v); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, newPath, v); err != nil {
return err
}
case SequenceNode:
// If sequence of maps, emit [[path.k]] per element
allMaps := true
for _, it := range v.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if allMaps {
key := strings.Join(append(append([]string{}, path...), k), ".")
for _, it := range v.Content {
if _, err := w.Write([]byte("[[" + key + "]]\n")); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, append(append([]string{}, path...), k), it); err != nil {
return err
}
}
} else {
// Regular array attribute under the current table path
if err := te.writeArrayAttribute(w, k, v); err != nil {
return err
}
}
case ScalarNode:
// Attributes directly under the current table path
if err := te.writeAttribute(w, k, v); err != nil {
return err
}
}
}
return nil
}
func (te *tomlEncoder) hasEncodeSeparateChild(m *CandidateNode) bool {
for i := 0; i < len(m.Content); i += 2 {
v := m.Content[i+1]
if v.Kind == MappingNode && v.EncodeSeparate {
return true
}
}
return false
}
func (te *tomlEncoder) hasStructuralChildren(m *CandidateNode) bool {
for i := 0; i < len(m.Content); i += 2 {
v := m.Content[i+1]
// Only consider it structural if mapping has EncodeSeparate or is non-empty
if v.Kind == MappingNode && v.EncodeSeparate {
return true
}
if v.Kind == SequenceNode {
allMaps := true
for _, it := range v.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if allMaps {
return true
}
}
}
return false
}
// encodeMappingBodyWithPath encodes attributes and nested arrays of tables using full dotted path context
func (te *tomlEncoder) encodeMappingBodyWithPath(w io.Writer, path []string, m *CandidateNode) error {
// First, attributes (scalars and non-map arrays)
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
switch v.Kind {
case ScalarNode:
if err := te.writeAttribute(w, k, v); err != nil {
return err
}
case SequenceNode:
allMaps := true
for _, it := range v.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if !allMaps {
if err := te.writeArrayAttribute(w, k, v); err != nil {
return err
}
}
}
}
// Then, nested arrays of tables with full path
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
if v.Kind == SequenceNode {
allMaps := true
for _, it := range v.Content {
if it.Kind != MappingNode {
allMaps = false
break
}
}
if allMaps {
dotted := strings.Join(append(append([]string{}, path...), k), ".")
for _, it := range v.Content {
if _, err := w.Write([]byte("[[" + dotted + "]]\n")); err != nil {
return err
}
if err := te.encodeMappingBodyWithPath(w, append(append([]string{}, path...), k), it); err != nil {
return err
}
}
}
}
}
// Finally, child mappings that are not marked EncodeSeparate get inlined as attributes
for i := 0; i < len(m.Content); i += 2 {
k := m.Content[i].Value
v := m.Content[i+1]
if v.Kind == MappingNode && !v.EncodeSeparate {
if err := te.writeInlineTableAttribute(w, k, v); err != nil {
return err
}
}
}
return nil
}
// colorizeToml applies syntax highlighting to TOML output using fatih/color
func (te *tomlEncoder) colorizeToml(input []byte) []byte {
toml := string(input)
result := strings.Builder{}
// Force color output (don't check for TTY)
color.NoColor = false
// Create color functions for different token types
commentColor := color.New(color.FgHiBlack).SprintFunc()
stringColor := color.New(color.FgGreen).SprintFunc()
numberColor := color.New(color.FgHiMagenta).SprintFunc()
keyColor := color.New(color.FgCyan).SprintFunc()
boolColor := color.New(color.FgHiMagenta).SprintFunc()
sectionColor := color.New(color.FgYellow, color.Bold).SprintFunc()
// Simple tokenization for TOML coloring
i := 0
for i < len(toml) {
ch := toml[i]
// Comments - from # to end of line
if ch == '#' {
end := i
for end < len(toml) && toml[end] != '\n' {
end++
}
result.WriteString(commentColor(toml[i:end]))
i = end
continue
}
// Table sections - [section] or [[array]]
// Only treat '[' as a table section if it appears at the start of the line
// (possibly after whitespace). This avoids mis-colouring inline arrays like
// "ports = [8000, 8001]" as table sections.
if ch == '[' {
isSectionHeader := true
if i > 0 {
isSectionHeader = false
j := i - 1
for j >= 0 && toml[j] != '\n' {
if toml[j] != ' ' && toml[j] != '\t' && toml[j] != '\r' {
// Found a non-whitespace character before this '[' on the same line,
// so this is not a table header.
break
}
j--
}
if j < 0 || toml[j] == '\n' {
// Reached the start of the string or a newline without encountering
// any non-whitespace, so '[' is at the logical start of the line.
isSectionHeader = true
}
}
if isSectionHeader {
end := i + 1
// Check for [[
if end < len(toml) && toml[end] == '[' {
end++
}
// Find closing ]
for end < len(toml) && toml[end] != ']' {
end++
}
// Include closing ]
if end < len(toml) {
end++
// Check for ]]
if end < len(toml) && toml[end] == ']' {
end++
}
}
result.WriteString(sectionColor(toml[i:end]))
i = end
continue
}
}
// Strings - quoted text (double or single quotes)
if ch == '"' || ch == '\'' {
quote := ch
end := i + 1
for end < len(toml) {
if toml[end] == quote {
break
}
if toml[end] == '\\' && end+1 < len(toml) {
// Skip the backslash and the escaped character
end += 2
continue
}
end++
}
if end < len(toml) {
end++ // include closing quote
}
result.WriteString(stringColor(toml[i:end]))
i = end
continue
}
// Numbers - sequences of digits, possibly with decimal point or minus
if (ch >= '0' && ch <= '9') || (ch == '-' && i+1 < len(toml) && toml[i+1] >= '0' && toml[i+1] <= '9') {
end := i
if ch == '-' {
end++
}
for end < len(toml) {
c := toml[end]
if (c >= '0' && c <= '9') || c == '.' || c == 'e' || c == 'E' {
end++
} else if (c == '+' || c == '-') && end > 0 && (toml[end-1] == 'e' || toml[end-1] == 'E') {
// Only allow + or - immediately after 'e' or 'E' for scientific notation
end++
} else {
break
}
}
result.WriteString(numberColor(toml[i:end]))
i = end
continue
}
// Identifiers/keys - alphanumeric + underscore + dash
if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' {
end := i
for end < len(toml) && ((toml[end] >= 'a' && toml[end] <= 'z') ||
(toml[end] >= 'A' && toml[end] <= 'Z') ||
(toml[end] >= '0' && toml[end] <= '9') ||
toml[end] == '_' || toml[end] == '-') {
end++
}
ident := toml[i:end]
// Check if this is a boolean/null keyword
switch ident {
case "true", "false":
result.WriteString(boolColor(ident))
default:
// Check if followed by = or whitespace then = (it's a key)
j := end
for j < len(toml) && (toml[j] == ' ' || toml[j] == '\t') {
j++
}
if j < len(toml) && toml[j] == '=' {
result.WriteString(keyColor(ident))
} else {
result.WriteString(ident) // plain text for other identifiers
}
}
i = end
continue
}
// Everything else (whitespace, operators, brackets) - no color
result.WriteByte(ch)
i++
}
return []byte(result.String())
}

View File

@ -63,7 +63,7 @@ var ShFormat = &Format{"", nil,
}
var TomlFormat = &Format{"toml", []string{},
func() Encoder { return NewTomlEncoder() },
func() Encoder { return NewTomlEncoderWithPrefs(ConfiguredTomlPreferences) },
func() Decoder { return NewTomlDecoder() },
}

15
pkg/yqlib/toml.go Normal file
View File

@ -0,0 +1,15 @@
package yqlib
type TomlPreferences struct {
ColorsEnabled bool
}
func NewDefaultTomlPreferences() TomlPreferences {
return TomlPreferences{ColorsEnabled: false}
}
func (p *TomlPreferences) Copy() TomlPreferences {
return TomlPreferences{ColorsEnabled: p.ColorsEnabled}
}
var ConfiguredTomlPreferences = NewDefaultTomlPreferences()

View File

@ -3,8 +3,10 @@ package yqlib
import (
"bufio"
"fmt"
"strings"
"testing"
"github.com/fatih/color"
"github.com/mikefarah/yq/v4/test"
)
@ -175,6 +177,82 @@ var expectedSampleWithHeader = `servers:
ip: 10.0.0.1
`
// Roundtrip fixtures
var rtInlineTableAttr = `name = { first = "Tom", last = "Preston-Werner" }
`
var rtTableSection = `[owner.contact]
name = "Tom"
age = 36
`
var rtArrayOfTables = `[[fruits]]
name = "apple"
[[fruits.varieties]]
name = "red delicious"
`
var rtArraysAndScalars = `A = ["hello", ["world", "again"]]
B = 12
`
var rtSimple = `A = "hello"
B = 12
`
var rtDeepPaths = `[person]
name = "hello"
address = "12 cat st"
`
var rtEmptyArray = `A = []
`
var rtSampleTable = `var = "x"
[owner.contact]
name = "Tom Preston-Werner"
age = 36
`
var rtEmptyTable = `[dependencies]
`
var rtComments = `# This is a comment
A = "hello" # inline comment
B = 12
# Table comment
[person]
name = "Tom" # name comment
`
// var sampleFromWeb = `
// # This is a TOML document
// title = "TOML Example"
// [owner]
// name = "Tom Preston-Werner"
// dob = 1979-05-27T07:32:00-08:00
// [database]
// enabled = true
// ports = [8000, 8001, 8002]
// data = [["delta", "phi"], [3.14]]
// temp_targets = { cpu = 79.5, case = 72.0 }
// [servers]
// [servers.alpha]
// ip = "10.0.0.1"
// role = "frontend"
// [servers.beta]
// ip = "10.0.0.2"
// role = "backend"
// `
var tomlScenarios = []formatScenario{
{
skipDoc: true,
@ -382,6 +460,84 @@ var tomlScenarios = []formatScenario{
expected: expectedMultipleEmptyTables,
scenarioType: "decode",
},
// Roundtrip scenarios
{
description: "Roundtrip: inline table attribute",
input: rtInlineTableAttr,
expression: ".",
expected: rtInlineTableAttr,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: table section",
input: rtTableSection,
expression: ".",
expected: rtTableSection,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: array of tables",
input: rtArrayOfTables,
expression: ".",
expected: rtArrayOfTables,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: arrays and scalars",
input: rtArraysAndScalars,
expression: ".",
expected: rtArraysAndScalars,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: simple",
input: rtSimple,
expression: ".",
expected: rtSimple,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: deep paths",
input: rtDeepPaths,
expression: ".",
expected: rtDeepPaths,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: empty array",
input: rtEmptyArray,
expression: ".",
expected: rtEmptyArray,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: sample table",
input: rtSampleTable,
expression: ".",
expected: rtSampleTable,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: empty table",
input: rtEmptyTable,
expression: ".",
expected: rtEmptyTable,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: comments",
input: rtComments,
expression: ".",
expected: rtComments,
scenarioType: "roundtrip",
},
// {
// description: "Roundtrip: sample from web",
// input: sampleFromWeb,
// expression: ".",
// expected: sampleFromWeb,
// scenarioType: "roundtrip",
// },
}
func testTomlScenario(t *testing.T, s formatScenario) {
@ -471,3 +627,238 @@ func TestTomlScenarios(t *testing.T) {
}
documentScenarios(t, "usage", "toml", genericScenarios, documentTomlScenario)
}
// TestTomlColourization tests that colourization correctly distinguishes
// between table section headers and inline arrays
func TestTomlColourization(t *testing.T) {
// Test that inline arrays are not coloured as table sections
encoder := &tomlEncoder{prefs: TomlPreferences{ColorsEnabled: true}}
// Create TOML with both table sections and inline arrays
input := []byte(`[database]
enabled = true
ports = [8000, 8001, 8002]
[servers]
alpha = "test"
`)
result := encoder.colorizeToml(input)
resultStr := string(result)
// The bug would cause the inline array [8000, 8001, 8002] to be
// coloured with the section colour (Yellow + Bold) instead of being
// left uncoloured or coloured differently.
//
// To test this, we check that the section colour codes appear only
// for actual table sections, not for inline arrays.
// Get the ANSI codes for section colour (Yellow + Bold)
sectionColour := color.New(color.FgYellow, color.Bold).SprintFunc()
sampleSection := sectionColour("[database]")
// Extract just the ANSI codes from the sample
// ANSI codes start with \x1b[
var ansiStart string
for i := 0; i < len(sampleSection); i++ {
if sampleSection[i] == '\x1b' {
// Find the end of the ANSI sequence (ends with 'm')
end := i
for end < len(sampleSection) && sampleSection[end] != 'm' {
end++
}
if end < len(sampleSection) {
ansiStart = sampleSection[i : end+1]
break
}
}
}
// Count how many times the section colour appears in the output
// It should appear exactly twice: once for [database] and once for [servers]
// If it appears more times (e.g., for [8000, 8001, 8002]), that's the bug
sectionColourCount := strings.Count(resultStr, ansiStart)
// We expect exactly 2 occurrences (for [database] and [servers])
// The bug would cause more occurrences (e.g., also for [8000)
if sectionColourCount != 2 {
t.Errorf("Expected section colour to appear exactly 2 times (for [database] and [servers]), but it appeared %d times.\nOutput: %s", sectionColourCount, resultStr)
}
}
func TestTomlColorisationNumberBug(t *testing.T) {
// Save and restore color state
oldNoColor := color.NoColor
color.NoColor = false
defer func() { color.NoColor = oldNoColor }()
encoder := NewTomlEncoder()
tomlEncoder := encoder.(*tomlEncoder)
// Test case that exposes the bug: "123-+-45" should NOT be colorized as a single number
input := "A = 123-+-45\n"
result := string(tomlEncoder.colorizeToml([]byte(input)))
// The bug causes "123-+-45" to be colorized as one token
// It should stop at "123" because the next character '-' is not valid in this position
if strings.Contains(result, "123-+-45") {
// Check if it's colorized as a single token (no color codes in the middle)
idx := strings.Index(result, "123-+-45")
// Look backwards for color code
beforeIdx := idx - 1
for beforeIdx >= 0 && result[beforeIdx] != '\x1b' {
beforeIdx--
}
// Look forward for reset code
afterIdx := idx + 8 // length of "123-+-45"
hasResetAfter := false
for afterIdx < len(result) && afterIdx < idx+20 {
if result[afterIdx] == '\x1b' {
hasResetAfter = true
break
}
afterIdx++
}
if beforeIdx >= 0 && hasResetAfter {
// The entire "123-+-45" is wrapped in color codes - this is the bug!
t.Errorf("BUG DETECTED: '123-+-45' is incorrectly colorized as a single number")
t.Errorf("Expected only '123' to be colorized as a number, but got the entire '123-+-45'")
t.Logf("Full output: %q", result)
t.Fail()
}
}
// Additional test cases for the bug
bugTests := []struct {
name string
input string
invalidSequence string
description string
}{
{
name: "consecutive minuses",
input: "A = 123--45\n",
invalidSequence: "123--45",
description: "'123--45' should not be colorized as a single number",
},
{
name: "plus in middle",
input: "A = 123+45\n",
invalidSequence: "123+45",
description: "'123+45' should not be colorized as a single number",
},
}
for _, tt := range bugTests {
t.Run(tt.name, func(t *testing.T) {
result := string(tomlEncoder.colorizeToml([]byte(tt.input)))
if strings.Contains(result, tt.invalidSequence) {
idx := strings.Index(result, tt.invalidSequence)
beforeIdx := idx - 1
for beforeIdx >= 0 && result[beforeIdx] != '\x1b' {
beforeIdx--
}
afterIdx := idx + len(tt.invalidSequence)
hasResetAfter := false
for afterIdx < len(result) && afterIdx < idx+20 {
if result[afterIdx] == '\x1b' {
hasResetAfter = true
break
}
afterIdx++
}
if beforeIdx >= 0 && hasResetAfter {
t.Errorf("BUG: %s", tt.description)
t.Logf("Full output: %q", result)
}
}
})
}
// Test that valid scientific notation still works
validTests := []struct {
name string
input string
}{
{"scientific positive", "A = 1.23e+45\n"},
{"scientific negative", "A = 6.626e-34\n"},
{"scientific uppercase", "A = 1.23E+10\n"},
}
for _, tt := range validTests {
t.Run(tt.name, func(t *testing.T) {
result := tomlEncoder.colorizeToml([]byte(tt.input))
if len(result) == 0 {
t.Error("Expected non-empty colorized output")
}
})
}
}
// TestTomlStringEscapeColourization tests that string colourization correctly
// handles escape sequences, particularly escaped quotes at the end of strings
func TestTomlStringEscapeColourization(t *testing.T) {
// Save and restore color state
oldNoColor := color.NoColor
color.NoColor = false
defer func() { color.NoColor = oldNoColor }()
encoder := NewTomlEncoder()
tomlEncoder := encoder.(*tomlEncoder)
testCases := []struct {
name string
input string
description string
}{
{
name: "escaped quote at end",
input: `A = "test\""` + "\n",
description: "String ending with escaped quote should be colorized correctly",
},
{
name: "escaped backslash then quote",
input: `A = "test\\\""` + "\n",
description: "String with escaped backslash followed by escaped quote",
},
{
name: "escaped quote in middle",
input: `A = "test\"middle"` + "\n",
description: "String with escaped quote in the middle should be colorized correctly",
},
{
name: "multiple escaped quotes",
input: `A = "\"test\""` + "\n",
description: "String with escaped quotes at start and end",
},
{
name: "escaped newline",
input: `A = "test\n"` + "\n",
description: "String with escaped newline should be colorized correctly",
},
{
name: "single quote with escaped single quote",
input: `A = 'test\''` + "\n",
description: "Single-quoted string with escaped single quote",
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
// The test should not panic and should return some output
result := tomlEncoder.colorizeToml([]byte(tt.input))
if len(result) == 0 {
t.Error("Expected non-empty colorized output")
}
// Check that the result contains the input string (with color codes)
// At minimum, it should contain "A" and "="
resultStr := string(result)
if !strings.Contains(resultStr, "A") || !strings.Contains(resultStr, "=") {
t.Errorf("Expected output to contain 'A' and '=', got: %q", resultStr)
}
})
}
}

View File

@ -277,4 +277,3 @@ nohcl
zclconf
cty
go-cty
unlabelled