From b4b96f2a68ae49542a4a6178da9cbc0fa19cae12 Mon Sep 17 00:00:00 2001 From: Tomer Shalev Date: Sat, 31 Jan 2026 14:41:30 +0200 Subject: [PATCH] Fix TOML table parsing after standalone comments Standalone TOML comments immediately inside a table/array-table no longer end the table scope, preventing subsequent keys from being flattened to the document root. --- pkg/yqlib/decoder_toml.go | 116 ++++++++++++++++++++++++++++++-------- pkg/yqlib/toml_test.go | 24 ++++++++ 2 files changed, 117 insertions(+), 23 deletions(-) diff --git a/pkg/yqlib/decoder_toml.go b/pkg/yqlib/decoder_toml.go index 8e76d944..846b3223 100644 --- a/pkg/yqlib/decoder_toml.go +++ b/pkg/yqlib/decoder_toml.go @@ -329,20 +329,51 @@ func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) { var tableValue *toml.Node runAgainstCurrentExp := false - hasValue := dec.parser.NextExpression() - // check to see if there is any table data - if hasValue { + sawKeyValue := false + for dec.parser.NextExpression() { tableValue = dec.parser.Expression() - // next expression is not table data, so we are done - if tableValue.Kind != toml.KeyValue { - log.Debug("got an empty table") - runAgainstCurrentExp = true - } else { - runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, tableValue) - if err != nil && !errors.Is(err, io.EOF) { - return false, err - } + // Allow standalone comments inside the table before the first key-value. + // These should be associated with the next element in the table (usually the first key-value), + // not treated as "end of table" (which would cause subsequent key-values to be parsed at root). + if tableValue.Kind == toml.Comment { + dec.pendingComments = append(dec.pendingComments, string(tableValue.Data)) + continue } + + // next expression is not table data, so we are done (but we need to re-process it at top-level) + if tableValue.Kind != toml.KeyValue { + log.Debug("got an empty table (or reached next section)") + // If the table had only comments, attach them to the table itself so they don't leak to the next node. + if !sawKeyValue && len(dec.pendingComments) > 0 { + comments := strings.Join(dec.pendingComments, "\n") + if tableNodeValue.HeadComment == "" { + tableNodeValue.HeadComment = comments + } else { + tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments + } + dec.pendingComments = make([]string, 0) + } + runAgainstCurrentExp = true + break + } + + sawKeyValue = true + runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, tableValue) + if err != nil && !errors.Is(err, io.EOF) { + return false, err + } + break + } + // If we hit EOF after only seeing comments inside this table, attach them to the table itself + // so they don't leak to whatever comes next. + if !sawKeyValue && len(dec.pendingComments) > 0 { + comments := strings.Join(dec.pendingComments, "\n") + if tableNodeValue.HeadComment == "" { + tableNodeValue.HeadComment = comments + } else { + tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments + } + dec.pendingComments = make([]string, 0) } err = dec.d.DeeplyAssign(c, fullPath, tableNodeValue) @@ -405,19 +436,58 @@ func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error) } runAgainstCurrentExp := false - // if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair) - // so lets leave that expression for the next round of parsing - if hasValue && (dec.parser.Expression().Kind == toml.ArrayTable || dec.parser.Expression().Kind == toml.Table) { - runAgainstCurrentExp = true - } else if hasValue { - // otherwise, if there is a value, it must be some key value pairs of the - // first object in the array! - tableValue := dec.parser.Expression() - runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, tableValue) - if err != nil && !errors.Is(err, io.EOF) { - return false, err + sawKeyValue := false + if hasValue { + for { + exp := dec.parser.Expression() + // Allow standalone comments inside array tables before the first key-value. + if exp.Kind == toml.Comment { + dec.pendingComments = append(dec.pendingComments, string(exp.Data)) + hasValue = dec.parser.NextExpression() + if !hasValue { + break + } + continue + } + + // if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair) + // so lets leave that expression for the next round of parsing + if exp.Kind == toml.ArrayTable || exp.Kind == toml.Table { + // If this array-table entry had only comments, attach them to the entry so they don't leak. + if !sawKeyValue && len(dec.pendingComments) > 0 { + comments := strings.Join(dec.pendingComments, "\n") + if tableNodeValue.HeadComment == "" { + tableNodeValue.HeadComment = comments + } else { + tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments + } + dec.pendingComments = make([]string, 0) + } + runAgainstCurrentExp = true + break + } + + sawKeyValue = true + // otherwise, if there is a value, it must be some key value pairs of the + // first object in the array! + runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, exp) + if err != nil && !errors.Is(err, io.EOF) { + return false, err + } + break } } + // If we hit EOF after only seeing comments inside this array-table entry, attach them to the entry + // so they don't leak to whatever comes next. + if !sawKeyValue && len(dec.pendingComments) > 0 { + comments := strings.Join(dec.pendingComments, "\n") + if tableNodeValue.HeadComment == "" { + tableNodeValue.HeadComment = comments + } else { + tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments + } + dec.pendingComments = make([]string, 0) + } // += function err = dec.arrayAppend(c, fullPath, tableNodeValue) diff --git a/pkg/yqlib/toml_test.go b/pkg/yqlib/toml_test.go index 4c2ef336..919555ec 100644 --- a/pkg/yqlib/toml_test.go +++ b/pkg/yqlib/toml_test.go @@ -228,6 +228,14 @@ B = 12 name = "Tom" # name comment ` +// Repro for https://github.com/mikefarah/yq/issues/2588 +// Bug: standalone comments inside a table cause subsequent key-values to be assigned at root. +var issue2588RustToolchainWithComments = ` +[owner] +# comment +name = "Tomer" +` + var sampleFromWeb = `# This is a TOML document title = "TOML Example" @@ -550,6 +558,22 @@ var tomlScenarios = []formatScenario{ expected: rtComments, scenarioType: "roundtrip", }, + { + skipDoc: true, + description: "Issue #2588: comments inside table must not flatten (.owner.name)", + input: issue2588RustToolchainWithComments, + expression: ".owner.name", + expected: "Tomer\n", + scenarioType: "decode", + }, + { + skipDoc: true, + description: "Issue #2588: comments inside table must not flatten (.name)", + input: issue2588RustToolchainWithComments, + expression: ".name", + expected: "null\n", + scenarioType: "decode", + }, { description: "Roundtrip: sample from web", input: sampleFromWeb,