diff --git a/pkg/yqlib/decoder_toml.go b/pkg/yqlib/decoder_toml.go index 29c88203..0b8c107b 100644 --- a/pkg/yqlib/decoder_toml.go +++ b/pkg/yqlib/decoder_toml.go @@ -8,16 +8,18 @@ import ( "fmt" "io" "strconv" + "strings" "time" toml "github.com/pelletier/go-toml/v2/unstable" ) type tomlDecoder struct { - parser toml.Parser - finished bool - d DataTreeNavigator - rootMap *CandidateNode + parser toml.Parser + finished bool + d DataTreeNavigator + rootMap *CandidateNode + fileBytes []byte } func NewTomlDecoder() Decoder { @@ -34,7 +36,8 @@ func (dec *tomlDecoder) Init(reader io.Reader) error { if err != nil { return err } - dec.parser.Reset(buf.Bytes()) + dec.fileBytes = buf.Bytes() + dec.parser.Reset(dec.fileBytes) dec.rootMap = &CandidateNode{ Kind: MappingNode, Tag: "!!map", @@ -42,6 +45,78 @@ func (dec *tomlDecoder) Init(reader io.Reader) error { return nil } +// extractLineComment extracts any inline comment (# ...) after the given position +func (dec *tomlDecoder) extractLineComment(endPos int) string { + src := dec.fileBytes + // Look for # comment after the token + for i := endPos; i < len(src); i++ { + if src[i] == '#' { + // Found comment, extract until end of line + start := i + for i < len(src) && src[i] != '\n' { + i++ + } + return strings.TrimSpace(string(src[start:i])) + } + if src[i] == '\n' { + // Hit newline before comment + break + } + // Skip whitespace and other characters + } + return "" +} + +// extractHeadComment extracts comments before a given start position +// Only extracts comments from immediately preceding lines (no blank lines in between) +func (dec *tomlDecoder) extractHeadComment(startPos int) string { + src := dec.fileBytes + var comments []string + + // Start just before the token and go back to previous newline + i := startPos - 1 + for i >= 0 && src[i] != '\n' { + i-- + } + // Now i is at the newline before the current line, or -1 if at start + + // Keep collecting comment lines going backwards + for i >= 0 { + // Move to end of previous line + i-- // skip the newline + if i < 0 { + break + } + + // Find the start of this line + lineEnd := i + for i >= 0 && src[i] != '\n' { + i-- + } + lineStart := i + 1 + + line := strings.TrimSpace(string(src[lineStart : lineEnd+1])) + + // Empty line stops the comment block + if line == "" { + break + } + + // Non-comment line stops the comment block + if !strings.HasPrefix(line, "#") { + break + } + + // Prepend this comment line + comments = append([]string{line}, comments...) + } + + if len(comments) > 0 { + return strings.Join(comments, "\n") + } + return "" +} + func (dec *tomlDecoder) getFullPath(tomlNode *toml.Node) []interface{} { path := make([]interface{}, 0) for { @@ -63,6 +138,21 @@ func (dec *tomlDecoder) processKeyValueIntoMap(rootMap *CandidateNode, tomlNode return err } + // Extract comments using the value's Raw range (more reliable than KeyValue node) + startPos := int(value.Raw.Offset) + endPos := int(value.Raw.Offset + value.Raw.Length) + + // HeadComment appears before the key-value line + if startPos > 0 { + if headComment := dec.extractHeadComment(startPos); headComment != "" { + valueNode.HeadComment = headComment + } + } + // LineComment appears after the value on the same line + if lineComment := dec.extractLineComment(endPos); lineComment != "" { + valueNode.LineComment = lineComment + } + context := Context{} context = context.SingleChildContext(rootMap) @@ -264,7 +354,8 @@ func (dec *tomlDecoder) processTopLevelNode(currentNode *toml.Node) (bool, error func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) { log.Debug("Enter processTable") - fullPath := dec.getFullPath(currentNode.Child()) + child := currentNode.Child() + fullPath := dec.getFullPath(child) log.Debug("fullpath: %v", fullPath) c := Context{} @@ -282,6 +373,14 @@ func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) { EncodeSeparate: true, } + // Extract head comment for the table section using the child node (first key in the table path) + startPos := int(child.Raw.Offset) + if startPos > 0 { + if headComment := dec.extractHeadComment(startPos); headComment != "" { + tableNodeValue.HeadComment = headComment + } + } + var tableValue *toml.Node runAgainstCurrentExp := false hasValue := dec.parser.NextExpression() @@ -331,7 +430,8 @@ func (dec *tomlDecoder) arrayAppend(context Context, path []interface{}, rhsNode func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error) { log.Debug("Enter processArrayTable") - fullPath := dec.getFullPath(currentNode.Child()) + child := currentNode.Child() + fullPath := dec.getFullPath(child) log.Debug("Fullpath: %v", fullPath) c := Context{} @@ -351,6 +451,15 @@ func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error) Tag: "!!map", EncodeSeparate: true, } + + // Extract head comment for the array table section using child node + startPos := int(child.Raw.Offset) + if startPos > 0 { + if headComment := dec.extractHeadComment(startPos); headComment != "" { + tableNodeValue.HeadComment = headComment + } + } + runAgainstCurrentExp := false // if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair) // so lets leave that expression for the next round of parsing diff --git a/pkg/yqlib/doc/usage/toml.md b/pkg/yqlib/doc/usage/toml.md index 365ac752..f0aec270 100644 --- a/pkg/yqlib/doc/usage/toml.md +++ b/pkg/yqlib/doc/usage/toml.md @@ -302,3 +302,30 @@ will output [dependencies] ``` +## Roundtrip: comments +Given a sample.toml file of: +```toml +# This is a comment +A = "hello" # inline comment +B = 12 + +# Table comment +[person] +name = "Tom" # name comment + +``` +then +```bash +yq '.' sample.toml +``` +will output +```yaml +# This is a comment +A = "hello" # inline comment +B = 12 + +# Table comment +[person] +name = "Tom" # name comment +``` + diff --git a/pkg/yqlib/encoder_toml.go b/pkg/yqlib/encoder_toml.go index 6f423717..40605843 100644 --- a/pkg/yqlib/encoder_toml.go +++ b/pkg/yqlib/encoder_toml.go @@ -41,6 +41,23 @@ func (te *tomlEncoder) CanHandleAliases() bool { // ---- helpers ---- +func (te *tomlEncoder) writeComment(w io.Writer, comment string) error { + if comment == "" { + return nil + } + lines := strings.Split(comment, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "#") { + line = "# " + line + } + if _, err := w.Write([]byte(line + "\n")); err != nil { + return err + } + } + return nil +} + func (te *tomlEncoder) formatScalar(node *CandidateNode) string { switch node.Tag { case "!!str": @@ -122,16 +139,47 @@ func (te *tomlEncoder) encodeTopLevelEntry(w io.Writer, path []string, node *Can func (te *tomlEncoder) writeAttribute(w io.Writer, key string, value *CandidateNode) error { te.wroteRootAttr = true // Mark that we wrote a root attribute - _, err := w.Write([]byte(key + " = " + te.formatScalar(value) + "\n")) + + // Write head comment before the attribute + if err := te.writeComment(w, value.HeadComment); err != nil { + return err + } + + // Write the attribute + line := key + " = " + te.formatScalar(value) + + // Add line comment if present + if value.LineComment != "" { + lineComment := strings.TrimSpace(value.LineComment) + if !strings.HasPrefix(lineComment, "#") { + lineComment = "# " + lineComment + } + line += " " + lineComment + } + + _, err := w.Write([]byte(line + "\n")) return err } func (te *tomlEncoder) writeArrayAttribute(w io.Writer, key string, seq *CandidateNode) error { te.wroteRootAttr = true // Mark that we wrote a root attribute + // Write head comment before the array + if err := te.writeComment(w, seq.HeadComment); err != nil { + return err + } + // Handle empty arrays if len(seq.Content) == 0 { - _, err := w.Write([]byte(key + " = []\n")) + line := key + " = []" + if seq.LineComment != "" { + lineComment := strings.TrimSpace(seq.LineComment) + if !strings.HasPrefix(lineComment, "#") { + lineComment = "# " + lineComment + } + line += " " + lineComment + } + _, err := w.Write([]byte(line + "\n")) return err } @@ -161,7 +209,19 @@ func (te *tomlEncoder) writeArrayAttribute(w io.Writer, key string, seq *Candida return fmt.Errorf("unsupported array item kind: %v", it.Kind) } } - _, err := w.Write([]byte(key + " = [" + strings.Join(items, ", ") + "]\n")) + + line := key + " = [" + strings.Join(items, ", ") + "]" + + // Add line comment if present + if seq.LineComment != "" { + lineComment := strings.TrimSpace(seq.LineComment) + if !strings.HasPrefix(lineComment, "#") { + lineComment = "# " + lineComment + } + line += " " + lineComment + } + + _, err := w.Write([]byte(line + "\n")) return err } @@ -229,17 +289,25 @@ func (te *tomlEncoder) writeInlineTableAttribute(w io.Writer, key string, m *Can return err } -func (te *tomlEncoder) writeTableHeader(w io.Writer, path []string) error { - // Add blank line before table header if we wrote root attributes - prefix := "" - if te.wroteRootAttr { - prefix = "\n" +func (te *tomlEncoder) writeTableHeader(w io.Writer, path []string, m *CandidateNode) error { + // Add blank line before table header (or before comment if present) if we wrote root attributes + needsBlankLine := te.wroteRootAttr + if needsBlankLine { + if _, err := w.Write([]byte("\n")); err != nil { + return err + } te.wroteRootAttr = false // Only add once } - // Write headers progressively to ensure nested tables - // Collapse to a single header line [a.b.c] - header := prefix + "[" + strings.Join(path, ".") + "]\n" + // Write head comment before the table header + if m.HeadComment != "" { + if err := te.writeComment(w, m.HeadComment); err != nil { + return err + } + } + + // Write table header [a.b.c] + header := "[" + strings.Join(path, ".") + "]\n" _, err := w.Write([]byte(header)) return err } @@ -273,7 +341,7 @@ func (te *tomlEncoder) encodeSeparateMapping(w io.Writer, path []string, m *Cand // If there are attributes or if the mapping is empty, emit the table header if hasAttrs || len(m.Content) == 0 { - if err := te.writeTableHeader(w, path); err != nil { + if err := te.writeTableHeader(w, path, m); err != nil { return err } if err := te.encodeMappingBodyWithPath(w, path, m); err != nil { @@ -290,7 +358,7 @@ func (te *tomlEncoder) encodeSeparateMapping(w io.Writer, path []string, m *Cand case MappingNode: // Emit [path.k] newPath := append(append([]string{}, path...), k) - if err := te.writeTableHeader(w, newPath); err != nil { + if err := te.writeTableHeader(w, newPath, v); err != nil { return err } if err := te.encodeMappingBodyWithPath(w, newPath, v); err != nil { diff --git a/pkg/yqlib/toml_test.go b/pkg/yqlib/toml_test.go index 19d5af74..29fa8cd8 100644 --- a/pkg/yqlib/toml_test.go +++ b/pkg/yqlib/toml_test.go @@ -216,6 +216,15 @@ age = 36 var rtEmptyTable = `[dependencies] ` +var rtComments = `# This is a comment +A = "hello" # inline comment +B = 12 + +# Table comment +[person] +name = "Tom" # name comment +` + var tomlScenarios = []formatScenario{ { skipDoc: true, @@ -487,6 +496,13 @@ var tomlScenarios = []formatScenario{ expected: rtEmptyTable, scenarioType: "roundtrip", }, + { + description: "Roundtrip: comments", + input: rtComments, + expression: ".", + expected: rtComments, + scenarioType: "roundtrip", + }, } func testTomlScenario(t *testing.T, s formatScenario) {