diff --git a/agents.md b/agents.md index 93de183e..bb222d28 100644 --- a/agents.md +++ b/agents.md @@ -1,3 +1,16 @@ +# General rules +✅ **DO:** +- You can use ./yq with the `--debug-node-info` flag to get a deeper understanding of the ast. +- run ./scripts/format.sh to format the code; then ./scripts/check.sh lint and finally ./scripts/spelling.sh to check spelling. +- Add comprehensive tests to cover the changes +- Run test suite to ensure there is no regression +- Use UK english spelling (e.g. Colorisation not Colorization) + +❌ **DON'T:** +- Git add or commit + + + # Adding a New Encoder/Decoder This guide explains how to add support for a new format (encoder/decoder) to yq without modifying `candidate_node.go`. diff --git a/cmd/utils.go b/cmd/utils.go index 4cecccfc..5082b6e3 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -206,6 +206,7 @@ func configureEncoder() (yqlib.Encoder, error) { yqlib.ConfiguredYamlPreferences.ColorsEnabled = colorsEnabled yqlib.ConfiguredJSONPreferences.ColorsEnabled = colorsEnabled yqlib.ConfiguredHclPreferences.ColorsEnabled = colorsEnabled + yqlib.ConfiguredTomlPreferences.ColorsEnabled = colorsEnabled yqlib.ConfiguredYamlPreferences.PrintDocSeparators = !noDocSeparators diff --git a/examples/sample.toml b/examples/sample.toml index ea5c4f93..1376c81a 100644 --- a/examples/sample.toml +++ b/examples/sample.toml @@ -1,6 +1,26 @@ -[[fruits]] -[animals] -[[fruits.varieties]] # nested array of tables -name = "red delicious" \ No newline at end of file +# This is a TOML document + +title = "TOML Example" + +[owner] +name = "Tom Preston-Werner" +dob = 1979-05-27T07:32:00-08:00 + +[database] +enabled = true +ports = [ 8000, 8001, 8002 ] +data = [ ["delta", "phi"], [3.14] ] +temp_targets = { cpu = 79.5, case = 72.0 } + +[servers] + +[servers.alpha] +ip = "10.0.0.1" +role = "frontend" + +[servers.beta] +ip = "10.0.0.2" +role = "backend" + diff --git a/pkg/yqlib/candidate_node.go b/pkg/yqlib/candidate_node.go index 1e119ae7..02168515 100644 --- a/pkg/yqlib/candidate_node.go +++ b/pkg/yqlib/candidate_node.go @@ -465,6 +465,9 @@ func (n *CandidateNode) UpdateAttributesFrom(other *CandidateNode, prefs assignP n.Anchor = other.Anchor } + // Preserve EncodeSeparate flag for format-specific encoding hints + n.EncodeSeparate = other.EncodeSeparate + // merge will pickup the style of the new thing // when autocreating nodes diff --git a/pkg/yqlib/decoder_toml.go b/pkg/yqlib/decoder_toml.go index 6c2fb34c..bd58c60f 100644 --- a/pkg/yqlib/decoder_toml.go +++ b/pkg/yqlib/decoder_toml.go @@ -8,16 +8,19 @@ import ( "fmt" "io" "strconv" + "strings" "time" toml "github.com/pelletier/go-toml/v2/unstable" ) type tomlDecoder struct { - parser toml.Parser - finished bool - d DataTreeNavigator - rootMap *CandidateNode + parser toml.Parser + finished bool + d DataTreeNavigator + rootMap *CandidateNode + pendingComments []string // Head comments collected from Comment nodes + firstContentSeen bool // Track if we've processed the first non-comment node } func NewTomlDecoder() Decoder { @@ -28,7 +31,7 @@ func NewTomlDecoder() Decoder { } func (dec *tomlDecoder) Init(reader io.Reader) error { - dec.parser = toml.Parser{} + dec.parser = toml.Parser{KeepComments: true} buf := new(bytes.Buffer) _, err := buf.ReadFrom(reader) if err != nil { @@ -39,6 +42,8 @@ func (dec *tomlDecoder) Init(reader io.Reader) error { Kind: MappingNode, Tag: "!!map", } + dec.pendingComments = make([]string, 0) + dec.firstContentSeen = false return nil } @@ -56,13 +61,24 @@ func (dec *tomlDecoder) getFullPath(tomlNode *toml.Node) []interface{} { func (dec *tomlDecoder) processKeyValueIntoMap(rootMap *CandidateNode, tomlNode *toml.Node) error { value := tomlNode.Value() path := dec.getFullPath(value.Next()) - log.Debug("processKeyValueIntoMap: %v", path) valueNode, err := dec.decodeNode(value) if err != nil { return err } + // Attach pending head comments + if len(dec.pendingComments) > 0 { + valueNode.HeadComment = strings.Join(dec.pendingComments, "\n") + dec.pendingComments = make([]string, 0) + } + + // Check for inline comment chained to the KeyValue node + nextNode := tomlNode.Next() + if nextNode != nil && nextNode.Kind == toml.Comment { + valueNode.LineComment = string(nextNode.Data) + } + context := Context{} context = context.SingleChildContext(rootMap) @@ -79,11 +95,15 @@ func (dec *tomlDecoder) decodeKeyValuesIntoMap(rootMap *CandidateNode, tomlNode nextItem := dec.parser.Expression() log.Debug("decodeKeyValuesIntoMap -- next exp, its a %v", nextItem.Kind) - if nextItem.Kind == toml.KeyValue { + switch nextItem.Kind { + case toml.KeyValue: if err := dec.processKeyValueIntoMap(rootMap, nextItem); err != nil { return false, err } - } else { + case toml.Comment: + // Standalone comment - add to pending for next element + dec.pendingComments = append(dec.pendingComments, string(nextItem.Data)) + default: // run out of key values log.Debug("done in decodeKeyValuesIntoMap, gota a %v", nextItem.Kind) return true, nil @@ -250,11 +270,29 @@ func (dec *tomlDecoder) processTopLevelNode(currentNode *toml.Node) (bool, error var err error log.Debug("processTopLevelNode: Going to process %v state is current %v", currentNode.Kind, NodeToString(dec.rootMap)) switch currentNode.Kind { + case toml.Comment: + // Collect comment to attach to next element + commentText := string(currentNode.Data) + // If we haven't seen any content yet, accumulate comments for root + if !dec.firstContentSeen { + if dec.rootMap.HeadComment == "" { + dec.rootMap.HeadComment = commentText + } else { + dec.rootMap.HeadComment = dec.rootMap.HeadComment + "\n" + commentText + } + } else { + // We've seen content, so these comments are for the next element + dec.pendingComments = append(dec.pendingComments, commentText) + } + return false, nil case toml.Table: + dec.firstContentSeen = true runAgainstCurrentExp, err = dec.processTable(currentNode) case toml.ArrayTable: + dec.firstContentSeen = true runAgainstCurrentExp, err = dec.processArrayTable(currentNode) default: + dec.firstContentSeen = true runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(dec.rootMap, currentNode) } @@ -264,7 +302,8 @@ func (dec *tomlDecoder) processTopLevelNode(currentNode *toml.Node) (bool, error func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) { log.Debug("Enter processTable") - fullPath := dec.getFullPath(currentNode.Child()) + child := currentNode.Child() + fullPath := dec.getFullPath(child) log.Debug("fullpath: %v", fullPath) c := Context{} @@ -276,9 +315,16 @@ func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) { } tableNodeValue := &CandidateNode{ - Kind: MappingNode, - Tag: "!!map", - Content: make([]*CandidateNode, 0), + Kind: MappingNode, + Tag: "!!map", + Content: make([]*CandidateNode, 0), + EncodeSeparate: true, + } + + // Attach pending head comments to the table + if len(dec.pendingComments) > 0 { + tableNodeValue.HeadComment = strings.Join(dec.pendingComments, "\n") + dec.pendingComments = make([]string, 0) } var tableValue *toml.Node @@ -330,7 +376,8 @@ func (dec *tomlDecoder) arrayAppend(context Context, path []interface{}, rhsNode func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error) { log.Debug("Enter processArrayTable") - fullPath := dec.getFullPath(currentNode.Child()) + child := currentNode.Child() + fullPath := dec.getFullPath(child) log.Debug("Fullpath: %v", fullPath) c := Context{} @@ -346,9 +393,17 @@ func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error) hasValue := dec.parser.NextExpression() tableNodeValue := &CandidateNode{ - Kind: MappingNode, - Tag: "!!map", + Kind: MappingNode, + Tag: "!!map", + EncodeSeparate: true, } + + // Attach pending head comments to the array table + if len(dec.pendingComments) > 0 { + tableNodeValue.HeadComment = strings.Join(dec.pendingComments, "\n") + dec.pendingComments = make([]string, 0) + } + runAgainstCurrentExp := false // if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair) // so lets leave that expression for the next round of parsing diff --git a/pkg/yqlib/doc/usage/hcl.md b/pkg/yqlib/doc/usage/hcl.md index 4d636a70..7704ca40 100644 --- a/pkg/yqlib/doc/usage/hcl.md +++ b/pkg/yqlib/doc/usage/hcl.md @@ -7,7 +7,7 @@ HCL is commonly used in HashiCorp tools like Terraform for configuration files. - String interpolation and expressions (preserved without quotes) - Comments (leading, head, and line comments) - Nested structures (maps and lists) -- Syntax colorization when enabled +- Syntax colorisation when enabled ## Parse HCL diff --git a/pkg/yqlib/doc/usage/headers/hcl.md b/pkg/yqlib/doc/usage/headers/hcl.md index c60a5453..7025d8e3 100644 --- a/pkg/yqlib/doc/usage/headers/hcl.md +++ b/pkg/yqlib/doc/usage/headers/hcl.md @@ -7,5 +7,5 @@ HCL is commonly used in HashiCorp tools like Terraform for configuration files. - String interpolation and expressions (preserved without quotes) - Comments (leading, head, and line comments) - Nested structures (maps and lists) -- Syntax colorization when enabled +- Syntax colorisation when enabled diff --git a/pkg/yqlib/doc/usage/toml.md b/pkg/yqlib/doc/usage/toml.md index 7cc4c375..f0aec270 100644 --- a/pkg/yqlib/doc/usage/toml.md +++ b/pkg/yqlib/doc/usage/toml.md @@ -141,3 +141,191 @@ will output dependencies: {} ``` +## Roundtrip: inline table attribute +Given a sample.toml file of: +```toml +name = { first = "Tom", last = "Preston-Werner" } + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +name = { first = "Tom", last = "Preston-Werner" } +``` + +## Roundtrip: table section +Given a sample.toml file of: +```toml +[owner.contact] +name = "Tom" +age = 36 + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +[owner.contact] +name = "Tom" +age = 36 +``` + +## Roundtrip: array of tables +Given a sample.toml file of: +```toml +[[fruits]] +name = "apple" +[[fruits.varieties]] +name = "red delicious" + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +[[fruits]] +name = "apple" +[[fruits.varieties]] +name = "red delicious" +``` + +## Roundtrip: arrays and scalars +Given a sample.toml file of: +```toml +A = ["hello", ["world", "again"]] +B = 12 + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +A = ["hello", ["world", "again"]] +B = 12 +``` + +## Roundtrip: simple +Given a sample.toml file of: +```toml +A = "hello" +B = 12 + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +A = "hello" +B = 12 +``` + +## Roundtrip: deep paths +Given a sample.toml file of: +```toml +[person] +name = "hello" +address = "12 cat st" + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +[person] +name = "hello" +address = "12 cat st" +``` + +## Roundtrip: empty array +Given a sample.toml file of: +```toml +A = [] + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +A = [] +``` + +## Roundtrip: sample table +Given a sample.toml file of: +```toml +var = "x" + +[owner.contact] +name = "Tom Preston-Werner" +age = 36 + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +var = "x" + +[owner.contact] +name = "Tom Preston-Werner" +age = 36 +``` + +## Roundtrip: empty table +Given a sample.toml file of: +```toml +[dependencies] + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +[dependencies] +``` + +## Roundtrip: comments +Given a sample.toml file of: +```toml +# This is a comment +A = "hello" # inline comment +B = 12 + +# Table comment +[person] +name = "Tom" # name comment + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +# This is a comment +A = "hello" # inline comment +B = 12 + +# Table comment +[person] +name = "Tom" # name comment +``` + diff --git a/pkg/yqlib/encoder_toml.go b/pkg/yqlib/encoder_toml.go index f8ced30f..3a415ef9 100644 --- a/pkg/yqlib/encoder_toml.go +++ b/pkg/yqlib/encoder_toml.go @@ -1,22 +1,56 @@ package yqlib import ( + "bytes" "fmt" "io" + "strings" + + "github.com/fatih/color" ) type tomlEncoder struct { + wroteRootAttr bool // Track if we wrote root-level attributes before tables + prefs TomlPreferences } func NewTomlEncoder() Encoder { - return &tomlEncoder{} + return NewTomlEncoderWithPrefs(ConfiguredTomlPreferences) +} + +func NewTomlEncoderWithPrefs(prefs TomlPreferences) Encoder { + return &tomlEncoder{prefs: prefs} } func (te *tomlEncoder) Encode(writer io.Writer, node *CandidateNode) error { - if node.Kind == ScalarNode { - return writeString(writer, node.Value+"\n") + if node.Kind != MappingNode { + // For standalone selections, TOML tests expect raw value for scalars + if node.Kind == ScalarNode { + return writeString(writer, node.Value+"\n") + } + return fmt.Errorf("TOML encoder expects a mapping at the root level") } - return fmt.Errorf("only scalars (e.g. strings, numbers, booleans) are supported for TOML output at the moment. Please use yaml output format (-oy) until the encoder has been fully implemented") + + // Encode to a buffer first if colors are enabled + var buf bytes.Buffer + var targetWriter io.Writer + targetWriter = writer + if te.prefs.ColorsEnabled { + targetWriter = &buf + } + + // Encode a root mapping as a sequence of attributes, tables, and arrays of tables + if err := te.encodeRootMapping(targetWriter, node); err != nil { + return err + } + + if te.prefs.ColorsEnabled { + colorized := te.colorizeToml(buf.Bytes()) + _, err := writer.Write(colorized) + return err + } + + return nil } func (te *tomlEncoder) PrintDocumentSeparator(_ io.Writer) error { @@ -30,3 +64,632 @@ func (te *tomlEncoder) PrintLeadingContent(_ io.Writer, _ string) error { func (te *tomlEncoder) CanHandleAliases() bool { return false } + +// ---- helpers ---- + +func (te *tomlEncoder) writeComment(w io.Writer, comment string) error { + if comment == "" { + return nil + } + lines := strings.Split(comment, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "#") { + line = "# " + line + } + if _, err := w.Write([]byte(line + "\n")); err != nil { + return err + } + } + return nil +} + +func (te *tomlEncoder) formatScalar(node *CandidateNode) string { + switch node.Tag { + case "!!str": + // Quote strings per TOML spec + return fmt.Sprintf("%q", node.Value) + case "!!bool", "!!int", "!!float": + return node.Value + case "!!null": + // TOML does not have null; encode as empty string + return `""` + default: + return node.Value + } +} + +func (te *tomlEncoder) encodeRootMapping(w io.Writer, node *CandidateNode) error { + te.wroteRootAttr = false // Reset state + + // Write root head comment if present (at the very beginning, no leading blank line) + if node.HeadComment != "" { + if err := te.writeComment(w, node.HeadComment); err != nil { + return err + } + } + + // Preserve existing order by iterating Content + for i := 0; i < len(node.Content); i += 2 { + keyNode := node.Content[i] + valNode := node.Content[i+1] + if err := te.encodeTopLevelEntry(w, []string{keyNode.Value}, valNode); err != nil { + return err + } + } + return nil +} + +// encodeTopLevelEntry encodes a key/value at the root, dispatching to attribute, table, or array-of-tables +func (te *tomlEncoder) encodeTopLevelEntry(w io.Writer, path []string, node *CandidateNode) error { + switch node.Kind { + case ScalarNode: + // key = value + return te.writeAttribute(w, path[len(path)-1], node) + case SequenceNode: + // Empty arrays should be encoded as [] attributes + if len(node.Content) == 0 { + return te.writeArrayAttribute(w, path[len(path)-1], node) + } + + // If all items are mappings => array of tables; else => array attribute + allMaps := true + for _, it := range node.Content { + if it.Kind != MappingNode { + allMaps = false + break + } + } + if allMaps { + key := path[len(path)-1] + for _, it := range node.Content { + // [[key]] then body + if _, err := w.Write([]byte("[[" + key + "]]\n")); err != nil { + return err + } + if err := te.encodeMappingBodyWithPath(w, []string{key}, it); err != nil { + return err + } + } + return nil + } + // Regular array attribute + return te.writeArrayAttribute(w, path[len(path)-1], node) + case MappingNode: + // Inline table if not EncodeSeparate, else emit separate tables/arrays of tables for children under this path + if !node.EncodeSeparate { + // If children contain mappings or arrays of mappings, prefer separate sections + if te.hasEncodeSeparateChild(node) || te.hasStructuralChildren(node) { + return te.encodeSeparateMapping(w, path, node) + } + return te.writeInlineTableAttribute(w, path[len(path)-1], node) + } + return te.encodeSeparateMapping(w, path, node) + default: + return fmt.Errorf("unsupported node kind for TOML: %v", node.Kind) + } +} + +func (te *tomlEncoder) writeAttribute(w io.Writer, key string, value *CandidateNode) error { + te.wroteRootAttr = true // Mark that we wrote a root attribute + + // Write head comment before the attribute + if err := te.writeComment(w, value.HeadComment); err != nil { + return err + } + + // Write the attribute + line := key + " = " + te.formatScalar(value) + + // Add line comment if present + if value.LineComment != "" { + lineComment := strings.TrimSpace(value.LineComment) + if !strings.HasPrefix(lineComment, "#") { + lineComment = "# " + lineComment + } + line += " " + lineComment + } + + _, err := w.Write([]byte(line + "\n")) + return err +} + +func (te *tomlEncoder) writeArrayAttribute(w io.Writer, key string, seq *CandidateNode) error { + te.wroteRootAttr = true // Mark that we wrote a root attribute + + // Write head comment before the array + if err := te.writeComment(w, seq.HeadComment); err != nil { + return err + } + + // Handle empty arrays + if len(seq.Content) == 0 { + line := key + " = []" + if seq.LineComment != "" { + lineComment := strings.TrimSpace(seq.LineComment) + if !strings.HasPrefix(lineComment, "#") { + lineComment = "# " + lineComment + } + line += " " + lineComment + } + _, err := w.Write([]byte(line + "\n")) + return err + } + + // Join scalars or nested arrays recursively into TOML array syntax + items := make([]string, 0, len(seq.Content)) + for _, it := range seq.Content { + switch it.Kind { + case ScalarNode: + items = append(items, te.formatScalar(it)) + case SequenceNode: + // Nested arrays: encode inline + nested, err := te.sequenceToInlineArray(it) + if err != nil { + return err + } + items = append(items, nested) + case MappingNode: + // Inline table inside array + inline, err := te.mappingToInlineTable(it) + if err != nil { + return err + } + items = append(items, inline) + case AliasNode: + return fmt.Errorf("aliases are not supported in TOML") + default: + return fmt.Errorf("unsupported array item kind: %v", it.Kind) + } + } + + line := key + " = [" + strings.Join(items, ", ") + "]" + + // Add line comment if present + if seq.LineComment != "" { + lineComment := strings.TrimSpace(seq.LineComment) + if !strings.HasPrefix(lineComment, "#") { + lineComment = "# " + lineComment + } + line += " " + lineComment + } + + _, err := w.Write([]byte(line + "\n")) + return err +} + +func (te *tomlEncoder) sequenceToInlineArray(seq *CandidateNode) (string, error) { + items := make([]string, 0, len(seq.Content)) + for _, it := range seq.Content { + switch it.Kind { + case ScalarNode: + items = append(items, te.formatScalar(it)) + case SequenceNode: + nested, err := te.sequenceToInlineArray(it) + if err != nil { + return "", err + } + items = append(items, nested) + case MappingNode: + inline, err := te.mappingToInlineTable(it) + if err != nil { + return "", err + } + items = append(items, inline) + default: + return "", fmt.Errorf("unsupported array item kind: %v", it.Kind) + } + } + return "[" + strings.Join(items, ", ") + "]", nil +} + +func (te *tomlEncoder) mappingToInlineTable(m *CandidateNode) (string, error) { + // key = { a = 1, b = "x" } + parts := make([]string, 0, len(m.Content)/2) + for i := 0; i < len(m.Content); i += 2 { + k := m.Content[i].Value + v := m.Content[i+1] + switch v.Kind { + case ScalarNode: + parts = append(parts, fmt.Sprintf("%s = %s", k, te.formatScalar(v))) + case SequenceNode: + // inline array in inline table + arr, err := te.sequenceToInlineArray(v) + if err != nil { + return "", err + } + parts = append(parts, fmt.Sprintf("%s = %s", k, arr)) + case MappingNode: + // nested inline table + inline, err := te.mappingToInlineTable(v) + if err != nil { + return "", err + } + parts = append(parts, fmt.Sprintf("%s = %s", k, inline)) + default: + return "", fmt.Errorf("unsupported inline table value kind: %v", v.Kind) + } + } + return "{ " + strings.Join(parts, ", ") + " }", nil +} + +func (te *tomlEncoder) writeInlineTableAttribute(w io.Writer, key string, m *CandidateNode) error { + inline, err := te.mappingToInlineTable(m) + if err != nil { + return err + } + _, err = w.Write([]byte(key + " = " + inline + "\n")) + return err +} + +func (te *tomlEncoder) writeTableHeader(w io.Writer, path []string, m *CandidateNode) error { + // Add blank line before table header (or before comment if present) if we wrote root attributes + needsBlankLine := te.wroteRootAttr + if needsBlankLine { + if _, err := w.Write([]byte("\n")); err != nil { + return err + } + te.wroteRootAttr = false // Only add once + } + + // Write head comment before the table header + if m.HeadComment != "" { + if err := te.writeComment(w, m.HeadComment); err != nil { + return err + } + } + + // Write table header [a.b.c] + header := "[" + strings.Join(path, ".") + "]\n" + _, err := w.Write([]byte(header)) + return err +} + +// encodeSeparateMapping handles a mapping that should be encoded as table sections. +// It emits the table header for this mapping if it has any content, then processes children. +func (te *tomlEncoder) encodeSeparateMapping(w io.Writer, path []string, m *CandidateNode) error { + // Check if this mapping has any non-mapping, non-array-of-tables children (i.e., attributes) + hasAttrs := false + for i := 0; i < len(m.Content); i += 2 { + v := m.Content[i+1] + if v.Kind == ScalarNode { + hasAttrs = true + break + } + if v.Kind == SequenceNode { + // Check if it's NOT an array of tables + allMaps := true + for _, it := range v.Content { + if it.Kind != MappingNode { + allMaps = false + break + } + } + if !allMaps { + hasAttrs = true + break + } + } + } + + // If there are attributes or if the mapping is empty, emit the table header + if hasAttrs || len(m.Content) == 0 { + if err := te.writeTableHeader(w, path, m); err != nil { + return err + } + if err := te.encodeMappingBodyWithPath(w, path, m); err != nil { + return err + } + return nil + } + + // No attributes, just nested structures - process children + for i := 0; i < len(m.Content); i += 2 { + k := m.Content[i].Value + v := m.Content[i+1] + switch v.Kind { + case MappingNode: + // Emit [path.k] + newPath := append(append([]string{}, path...), k) + if err := te.writeTableHeader(w, newPath, v); err != nil { + return err + } + if err := te.encodeMappingBodyWithPath(w, newPath, v); err != nil { + return err + } + case SequenceNode: + // If sequence of maps, emit [[path.k]] per element + allMaps := true + for _, it := range v.Content { + if it.Kind != MappingNode { + allMaps = false + break + } + } + if allMaps { + key := strings.Join(append(append([]string{}, path...), k), ".") + for _, it := range v.Content { + if _, err := w.Write([]byte("[[" + key + "]]\n")); err != nil { + return err + } + if err := te.encodeMappingBodyWithPath(w, append(append([]string{}, path...), k), it); err != nil { + return err + } + } + } else { + // Regular array attribute under the current table path + if err := te.writeArrayAttribute(w, k, v); err != nil { + return err + } + } + case ScalarNode: + // Attributes directly under the current table path + if err := te.writeAttribute(w, k, v); err != nil { + return err + } + } + } + return nil +} + +func (te *tomlEncoder) hasEncodeSeparateChild(m *CandidateNode) bool { + for i := 0; i < len(m.Content); i += 2 { + v := m.Content[i+1] + if v.Kind == MappingNode && v.EncodeSeparate { + return true + } + } + return false +} + +func (te *tomlEncoder) hasStructuralChildren(m *CandidateNode) bool { + for i := 0; i < len(m.Content); i += 2 { + v := m.Content[i+1] + // Only consider it structural if mapping has EncodeSeparate or is non-empty + if v.Kind == MappingNode && v.EncodeSeparate { + return true + } + if v.Kind == SequenceNode { + allMaps := true + for _, it := range v.Content { + if it.Kind != MappingNode { + allMaps = false + break + } + } + if allMaps { + return true + } + } + } + return false +} + +// encodeMappingBodyWithPath encodes attributes and nested arrays of tables using full dotted path context +func (te *tomlEncoder) encodeMappingBodyWithPath(w io.Writer, path []string, m *CandidateNode) error { + // First, attributes (scalars and non-map arrays) + for i := 0; i < len(m.Content); i += 2 { + k := m.Content[i].Value + v := m.Content[i+1] + switch v.Kind { + case ScalarNode: + if err := te.writeAttribute(w, k, v); err != nil { + return err + } + case SequenceNode: + allMaps := true + for _, it := range v.Content { + if it.Kind != MappingNode { + allMaps = false + break + } + } + if !allMaps { + if err := te.writeArrayAttribute(w, k, v); err != nil { + return err + } + } + } + } + + // Then, nested arrays of tables with full path + for i := 0; i < len(m.Content); i += 2 { + k := m.Content[i].Value + v := m.Content[i+1] + if v.Kind == SequenceNode { + allMaps := true + for _, it := range v.Content { + if it.Kind != MappingNode { + allMaps = false + break + } + } + if allMaps { + dotted := strings.Join(append(append([]string{}, path...), k), ".") + for _, it := range v.Content { + if _, err := w.Write([]byte("[[" + dotted + "]]\n")); err != nil { + return err + } + if err := te.encodeMappingBodyWithPath(w, append(append([]string{}, path...), k), it); err != nil { + return err + } + } + } + } + } + + // Finally, child mappings that are not marked EncodeSeparate get inlined as attributes + for i := 0; i < len(m.Content); i += 2 { + k := m.Content[i].Value + v := m.Content[i+1] + if v.Kind == MappingNode && !v.EncodeSeparate { + if err := te.writeInlineTableAttribute(w, k, v); err != nil { + return err + } + } + } + return nil +} + +// colorizeToml applies syntax highlighting to TOML output using fatih/color +func (te *tomlEncoder) colorizeToml(input []byte) []byte { + toml := string(input) + result := strings.Builder{} + + // Force color output (don't check for TTY) + color.NoColor = false + + // Create color functions for different token types + commentColor := color.New(color.FgHiBlack).SprintFunc() + stringColor := color.New(color.FgGreen).SprintFunc() + numberColor := color.New(color.FgHiMagenta).SprintFunc() + keyColor := color.New(color.FgCyan).SprintFunc() + boolColor := color.New(color.FgHiMagenta).SprintFunc() + sectionColor := color.New(color.FgYellow, color.Bold).SprintFunc() + + // Simple tokenization for TOML coloring + i := 0 + for i < len(toml) { + ch := toml[i] + + // Comments - from # to end of line + if ch == '#' { + end := i + for end < len(toml) && toml[end] != '\n' { + end++ + } + result.WriteString(commentColor(toml[i:end])) + i = end + continue + } + + // Table sections - [section] or [[array]] + // Only treat '[' as a table section if it appears at the start of the line + // (possibly after whitespace). This avoids mis-colouring inline arrays like + // "ports = [8000, 8001]" as table sections. + if ch == '[' { + isSectionHeader := true + if i > 0 { + isSectionHeader = false + j := i - 1 + for j >= 0 && toml[j] != '\n' { + if toml[j] != ' ' && toml[j] != '\t' && toml[j] != '\r' { + // Found a non-whitespace character before this '[' on the same line, + // so this is not a table header. + break + } + j-- + } + if j < 0 || toml[j] == '\n' { + // Reached the start of the string or a newline without encountering + // any non-whitespace, so '[' is at the logical start of the line. + isSectionHeader = true + } + } + if isSectionHeader { + end := i + 1 + // Check for [[ + if end < len(toml) && toml[end] == '[' { + end++ + } + // Find closing ] + for end < len(toml) && toml[end] != ']' { + end++ + } + // Include closing ] + if end < len(toml) { + end++ + // Check for ]] + if end < len(toml) && toml[end] == ']' { + end++ + } + } + result.WriteString(sectionColor(toml[i:end])) + i = end + continue + } + } + + // Strings - quoted text (double or single quotes) + if ch == '"' || ch == '\'' { + quote := ch + end := i + 1 + for end < len(toml) { + if toml[end] == quote { + break + } + if toml[end] == '\\' && end+1 < len(toml) { + // Skip the backslash and the escaped character + end += 2 + continue + } + end++ + } + if end < len(toml) { + end++ // include closing quote + } + result.WriteString(stringColor(toml[i:end])) + i = end + continue + } + + // Numbers - sequences of digits, possibly with decimal point or minus + if (ch >= '0' && ch <= '9') || (ch == '-' && i+1 < len(toml) && toml[i+1] >= '0' && toml[i+1] <= '9') { + end := i + if ch == '-' { + end++ + } + for end < len(toml) { + c := toml[end] + if (c >= '0' && c <= '9') || c == '.' || c == 'e' || c == 'E' { + end++ + } else if (c == '+' || c == '-') && end > 0 && (toml[end-1] == 'e' || toml[end-1] == 'E') { + // Only allow + or - immediately after 'e' or 'E' for scientific notation + end++ + } else { + break + } + } + result.WriteString(numberColor(toml[i:end])) + i = end + continue + } + + // Identifiers/keys - alphanumeric + underscore + dash + if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' { + end := i + for end < len(toml) && ((toml[end] >= 'a' && toml[end] <= 'z') || + (toml[end] >= 'A' && toml[end] <= 'Z') || + (toml[end] >= '0' && toml[end] <= '9') || + toml[end] == '_' || toml[end] == '-') { + end++ + } + ident := toml[i:end] + + // Check if this is a boolean/null keyword + switch ident { + case "true", "false": + result.WriteString(boolColor(ident)) + default: + // Check if followed by = or whitespace then = (it's a key) + j := end + for j < len(toml) && (toml[j] == ' ' || toml[j] == '\t') { + j++ + } + if j < len(toml) && toml[j] == '=' { + result.WriteString(keyColor(ident)) + } else { + result.WriteString(ident) // plain text for other identifiers + } + } + i = end + continue + } + + // Everything else (whitespace, operators, brackets) - no color + result.WriteByte(ch) + i++ + } + + return []byte(result.String()) +} diff --git a/pkg/yqlib/format.go b/pkg/yqlib/format.go index edb7ce46..29336e66 100644 --- a/pkg/yqlib/format.go +++ b/pkg/yqlib/format.go @@ -63,7 +63,7 @@ var ShFormat = &Format{"", nil, } var TomlFormat = &Format{"toml", []string{}, - func() Encoder { return NewTomlEncoder() }, + func() Encoder { return NewTomlEncoderWithPrefs(ConfiguredTomlPreferences) }, func() Decoder { return NewTomlDecoder() }, } diff --git a/pkg/yqlib/toml.go b/pkg/yqlib/toml.go new file mode 100644 index 00000000..4147954c --- /dev/null +++ b/pkg/yqlib/toml.go @@ -0,0 +1,15 @@ +package yqlib + +type TomlPreferences struct { + ColorsEnabled bool +} + +func NewDefaultTomlPreferences() TomlPreferences { + return TomlPreferences{ColorsEnabled: false} +} + +func (p *TomlPreferences) Copy() TomlPreferences { + return TomlPreferences{ColorsEnabled: p.ColorsEnabled} +} + +var ConfiguredTomlPreferences = NewDefaultTomlPreferences() diff --git a/pkg/yqlib/toml_test.go b/pkg/yqlib/toml_test.go index c9c22f4a..be478aa4 100644 --- a/pkg/yqlib/toml_test.go +++ b/pkg/yqlib/toml_test.go @@ -3,8 +3,10 @@ package yqlib import ( "bufio" "fmt" + "strings" "testing" + "github.com/fatih/color" "github.com/mikefarah/yq/v4/test" ) @@ -175,6 +177,82 @@ var expectedSampleWithHeader = `servers: ip: 10.0.0.1 ` +// Roundtrip fixtures +var rtInlineTableAttr = `name = { first = "Tom", last = "Preston-Werner" } +` + +var rtTableSection = `[owner.contact] +name = "Tom" +age = 36 +` + +var rtArrayOfTables = `[[fruits]] +name = "apple" +[[fruits.varieties]] +name = "red delicious" +` + +var rtArraysAndScalars = `A = ["hello", ["world", "again"]] +B = 12 +` + +var rtSimple = `A = "hello" +B = 12 +` + +var rtDeepPaths = `[person] +name = "hello" +address = "12 cat st" +` + +var rtEmptyArray = `A = [] +` + +var rtSampleTable = `var = "x" + +[owner.contact] +name = "Tom Preston-Werner" +age = 36 +` + +var rtEmptyTable = `[dependencies] +` + +var rtComments = `# This is a comment +A = "hello" # inline comment +B = 12 + +# Table comment +[person] +name = "Tom" # name comment +` + +// var sampleFromWeb = ` +// # This is a TOML document + +// title = "TOML Example" + +// [owner] +// name = "Tom Preston-Werner" +// dob = 1979-05-27T07:32:00-08:00 + +// [database] +// enabled = true +// ports = [8000, 8001, 8002] +// data = [["delta", "phi"], [3.14]] +// temp_targets = { cpu = 79.5, case = 72.0 } + +// [servers] + +// [servers.alpha] +// ip = "10.0.0.1" +// role = "frontend" + +// [servers.beta] +// ip = "10.0.0.2" +// role = "backend" +// ` + var tomlScenarios = []formatScenario{ { skipDoc: true, @@ -382,6 +460,84 @@ var tomlScenarios = []formatScenario{ expected: expectedMultipleEmptyTables, scenarioType: "decode", }, + // Roundtrip scenarios + { + description: "Roundtrip: inline table attribute", + input: rtInlineTableAttr, + expression: ".", + expected: rtInlineTableAttr, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: table section", + input: rtTableSection, + expression: ".", + expected: rtTableSection, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: array of tables", + input: rtArrayOfTables, + expression: ".", + expected: rtArrayOfTables, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: arrays and scalars", + input: rtArraysAndScalars, + expression: ".", + expected: rtArraysAndScalars, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: simple", + input: rtSimple, + expression: ".", + expected: rtSimple, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: deep paths", + input: rtDeepPaths, + expression: ".", + expected: rtDeepPaths, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: empty array", + input: rtEmptyArray, + expression: ".", + expected: rtEmptyArray, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: sample table", + input: rtSampleTable, + expression: ".", + expected: rtSampleTable, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: empty table", + input: rtEmptyTable, + expression: ".", + expected: rtEmptyTable, + scenarioType: "roundtrip", + }, + { + description: "Roundtrip: comments", + input: rtComments, + expression: ".", + expected: rtComments, + scenarioType: "roundtrip", + }, + // { + // description: "Roundtrip: sample from web", + // input: sampleFromWeb, + // expression: ".", + // expected: sampleFromWeb, + // scenarioType: "roundtrip", + // }, } func testTomlScenario(t *testing.T, s formatScenario) { @@ -471,3 +627,238 @@ func TestTomlScenarios(t *testing.T) { } documentScenarios(t, "usage", "toml", genericScenarios, documentTomlScenario) } + +// TestTomlColourization tests that colourization correctly distinguishes +// between table section headers and inline arrays +func TestTomlColourization(t *testing.T) { + // Test that inline arrays are not coloured as table sections + encoder := &tomlEncoder{prefs: TomlPreferences{ColorsEnabled: true}} + + // Create TOML with both table sections and inline arrays + input := []byte(`[database] +enabled = true +ports = [8000, 8001, 8002] + +[servers] +alpha = "test" +`) + + result := encoder.colorizeToml(input) + resultStr := string(result) + + // The bug would cause the inline array [8000, 8001, 8002] to be + // coloured with the section colour (Yellow + Bold) instead of being + // left uncoloured or coloured differently. + // + // To test this, we check that the section colour codes appear only + // for actual table sections, not for inline arrays. + + // Get the ANSI codes for section colour (Yellow + Bold) + sectionColour := color.New(color.FgYellow, color.Bold).SprintFunc() + sampleSection := sectionColour("[database]") + + // Extract just the ANSI codes from the sample + // ANSI codes start with \x1b[ + var ansiStart string + for i := 0; i < len(sampleSection); i++ { + if sampleSection[i] == '\x1b' { + // Find the end of the ANSI sequence (ends with 'm') + end := i + for end < len(sampleSection) && sampleSection[end] != 'm' { + end++ + } + if end < len(sampleSection) { + ansiStart = sampleSection[i : end+1] + break + } + } + } + + // Count how many times the section colour appears in the output + // It should appear exactly twice: once for [database] and once for [servers] + // If it appears more times (e.g., for [8000, 8001, 8002]), that's the bug + sectionColourCount := strings.Count(resultStr, ansiStart) + + // We expect exactly 2 occurrences (for [database] and [servers]) + // The bug would cause more occurrences (e.g., also for [8000) + if sectionColourCount != 2 { + t.Errorf("Expected section colour to appear exactly 2 times (for [database] and [servers]), but it appeared %d times.\nOutput: %s", sectionColourCount, resultStr) + } +} + +func TestTomlColorisationNumberBug(t *testing.T) { + // Save and restore color state + oldNoColor := color.NoColor + color.NoColor = false + defer func() { color.NoColor = oldNoColor }() + + encoder := NewTomlEncoder() + tomlEncoder := encoder.(*tomlEncoder) + + // Test case that exposes the bug: "123-+-45" should NOT be colorized as a single number + input := "A = 123-+-45\n" + result := string(tomlEncoder.colorizeToml([]byte(input))) + + // The bug causes "123-+-45" to be colorized as one token + // It should stop at "123" because the next character '-' is not valid in this position + if strings.Contains(result, "123-+-45") { + // Check if it's colorized as a single token (no color codes in the middle) + idx := strings.Index(result, "123-+-45") + // Look backwards for color code + beforeIdx := idx - 1 + for beforeIdx >= 0 && result[beforeIdx] != '\x1b' { + beforeIdx-- + } + // Look forward for reset code + afterIdx := idx + 8 // length of "123-+-45" + hasResetAfter := false + for afterIdx < len(result) && afterIdx < idx+20 { + if result[afterIdx] == '\x1b' { + hasResetAfter = true + break + } + afterIdx++ + } + + if beforeIdx >= 0 && hasResetAfter { + // The entire "123-+-45" is wrapped in color codes - this is the bug! + t.Errorf("BUG DETECTED: '123-+-45' is incorrectly colorized as a single number") + t.Errorf("Expected only '123' to be colorized as a number, but got the entire '123-+-45'") + t.Logf("Full output: %q", result) + t.Fail() + } + } + + // Additional test cases for the bug + bugTests := []struct { + name string + input string + invalidSequence string + description string + }{ + { + name: "consecutive minuses", + input: "A = 123--45\n", + invalidSequence: "123--45", + description: "'123--45' should not be colorized as a single number", + }, + { + name: "plus in middle", + input: "A = 123+45\n", + invalidSequence: "123+45", + description: "'123+45' should not be colorized as a single number", + }, + } + + for _, tt := range bugTests { + t.Run(tt.name, func(t *testing.T) { + result := string(tomlEncoder.colorizeToml([]byte(tt.input))) + if strings.Contains(result, tt.invalidSequence) { + idx := strings.Index(result, tt.invalidSequence) + beforeIdx := idx - 1 + for beforeIdx >= 0 && result[beforeIdx] != '\x1b' { + beforeIdx-- + } + afterIdx := idx + len(tt.invalidSequence) + hasResetAfter := false + for afterIdx < len(result) && afterIdx < idx+20 { + if result[afterIdx] == '\x1b' { + hasResetAfter = true + break + } + afterIdx++ + } + + if beforeIdx >= 0 && hasResetAfter { + t.Errorf("BUG: %s", tt.description) + t.Logf("Full output: %q", result) + } + } + }) + } + + // Test that valid scientific notation still works + validTests := []struct { + name string + input string + }{ + {"scientific positive", "A = 1.23e+45\n"}, + {"scientific negative", "A = 6.626e-34\n"}, + {"scientific uppercase", "A = 1.23E+10\n"}, + } + + for _, tt := range validTests { + t.Run(tt.name, func(t *testing.T) { + result := tomlEncoder.colorizeToml([]byte(tt.input)) + if len(result) == 0 { + t.Error("Expected non-empty colorized output") + } + }) + } +} + +// TestTomlStringEscapeColourization tests that string colourization correctly +// handles escape sequences, particularly escaped quotes at the end of strings +func TestTomlStringEscapeColourization(t *testing.T) { + // Save and restore color state + oldNoColor := color.NoColor + color.NoColor = false + defer func() { color.NoColor = oldNoColor }() + + encoder := NewTomlEncoder() + tomlEncoder := encoder.(*tomlEncoder) + + testCases := []struct { + name string + input string + description string + }{ + { + name: "escaped quote at end", + input: `A = "test\""` + "\n", + description: "String ending with escaped quote should be colorized correctly", + }, + { + name: "escaped backslash then quote", + input: `A = "test\\\""` + "\n", + description: "String with escaped backslash followed by escaped quote", + }, + { + name: "escaped quote in middle", + input: `A = "test\"middle"` + "\n", + description: "String with escaped quote in the middle should be colorized correctly", + }, + { + name: "multiple escaped quotes", + input: `A = "\"test\""` + "\n", + description: "String with escaped quotes at start and end", + }, + { + name: "escaped newline", + input: `A = "test\n"` + "\n", + description: "String with escaped newline should be colorized correctly", + }, + { + name: "single quote with escaped single quote", + input: `A = 'test\''` + "\n", + description: "Single-quoted string with escaped single quote", + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + // The test should not panic and should return some output + result := tomlEncoder.colorizeToml([]byte(tt.input)) + if len(result) == 0 { + t.Error("Expected non-empty colorized output") + } + + // Check that the result contains the input string (with color codes) + // At minimum, it should contain "A" and "=" + resultStr := string(result) + if !strings.Contains(resultStr, "A") || !strings.Contains(resultStr, "=") { + t.Errorf("Expected output to contain 'A' and '=', got: %q", resultStr) + } + }) + } +} diff --git a/project-words.txt b/project-words.txt index d5dc035c..849331e1 100644 --- a/project-words.txt +++ b/project-words.txt @@ -277,4 +277,3 @@ nohcl zclconf cty go-cty -unlabelled \ No newline at end of file