From 2195df9e7a71476758dbe7d2f8590cf9c196aedc Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Thu, 2 Mar 2023 10:57:54 +1100 Subject: [PATCH] Fixed xml encoding of ProcInst #1563, improved XML comment handling --- pkg/yqlib/decoder_xml.go | 12 ++++- pkg/yqlib/doc/usage/xml.md | 13 +++-- pkg/yqlib/encoder_xml.go | 86 ++++++++++++++++++++--------- pkg/yqlib/xml_test.go | 108 +++++++++++++++++++++++++++++++++++-- 4 files changed, 184 insertions(+), 35 deletions(-) diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go index bfb18e12..5c05b97f 100644 --- a/pkg/yqlib/decoder_xml.go +++ b/pkg/yqlib/decoder_xml.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "regexp" "strings" "unicode" @@ -48,11 +49,19 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) { return yamlNode, nil } +var decoderCommentPrefix = regexp.MustCompile(`(^|\n)([[:alpha:]])`) + func (dec *xmlDecoder) processComment(c string) string { if c == "" { return "" } - return "#" + strings.TrimRight(c, " ") + //need to replace "cat " with "# cat" + // "\ncat\n" with "\n cat\n" + // ensure non-empty comments starting with newline have a space in front + + replacement := decoderCommentPrefix.ReplaceAllString(c, "$1 $2") + replacement = "#" + strings.ReplaceAll(strings.TrimRight(replacement, " "), "\n", "\n#") + return replacement } func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { @@ -77,6 +86,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { var err error if i == 0 { + log.Debugf("head comment here") labelNode.HeadComment = dec.processComment(n.HeadComment) } diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index 29a2b585..3d85a4ff 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -407,8 +407,10 @@ A best attempt is made to copy comments to xml. Given a sample.yml file of: ```yaml +# # header comment # above_cat +# cat: # inline_cat # above_array array: # inline_array @@ -425,9 +427,11 @@ yq -o=xml '.' sample.yml will output ```xml +header comment +above_cat +--> + + val1 val2 @@ -489,7 +493,8 @@ yq -p=xml -o=xml '.' sample.xml ``` will output ```xml - + + 3 diff --git a/pkg/yqlib/encoder_xml.go b/pkg/yqlib/encoder_xml.go index d60ede8f..345e825e 100644 --- a/pkg/yqlib/encoder_xml.go +++ b/pkg/yqlib/encoder_xml.go @@ -19,8 +19,6 @@ type xmlEncoder struct { leadingContent string } -var commentPrefix = regexp.MustCompile(`(^|\n)\s*#`) - func NewXMLEncoder(indent int, prefs XmlPreferences) Encoder { var indentString = "" @@ -39,7 +37,7 @@ func (e *xmlEncoder) PrintDocumentSeparator(writer io.Writer) error { } func (e *xmlEncoder) PrintLeadingContent(writer io.Writer, content string) error { - e.leadingContent = commentPrefix.ReplaceAllString(content, "\n") + e.leadingContent = content return nil } @@ -48,12 +46,39 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error { // hack so we can manually add newlines to procInst and directives e.writer = writer encoder.Indent("", e.indentString) + var newLine xml.CharData = []byte("\n") + + mapNode := unwrapDoc(node) + if mapNode.Tag == "!!map" { + // make sure processing instructions are encoded first + for i := 0; i < len(mapNode.Content); i += 2 { + key := mapNode.Content[i] + value := mapNode.Content[i+1] + + if key.Value == (e.prefs.ProcInstPrefix + "xml") { + name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1) + procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)} + if err := encoder.EncodeToken(procInst); err != nil { + return err + } + if _, err := e.writer.Write([]byte("\n")); err != nil { + log.Warning("Unable to write newline, skipping: %w", err) + } + } + } + } if e.leadingContent != "" { + + // remove first and last newlines if present err := e.encodeComment(encoder, e.leadingContent) if err != nil { return err } + err = encoder.EncodeToken(newLine) + if err != nil { + return err + } } switch node.Kind { @@ -89,29 +114,12 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error { default: return fmt.Errorf("unsupported type %v", node.Tag) } - var charData xml.CharData = []byte("\n") - return encoder.EncodeToken(charData) + + return encoder.EncodeToken(newLine) } func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) error { - // make sure processing instructions are encoded first - for i := 0; i < len(node.Content); i += 2 { - key := node.Content[i] - value := node.Content[i+1] - - if key.Value == (e.prefs.ProcInstPrefix + "xml") { - name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1) - procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)} - if err := encoder.EncodeToken(procInst); err != nil { - return err - } - if _, err := e.writer.Write([]byte("\n")); err != nil { - log.Warning("Unable to write newline, skipping: %w", err) - } - } - } - err := e.encodeComment(encoder, headAndLineComment(node)) if err != nil { return err @@ -126,6 +134,13 @@ func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) er if err != nil { return err } + if headAndLineComment(key) != "" { + var newLine xml.CharData = []byte("\n") + err = encoder.EncodeToken(newLine) + if err != nil { + return err + } + } if key.Value == (e.prefs.ProcInstPrefix + "xml") { // dont double process these. @@ -206,13 +221,34 @@ func (e *xmlEncoder) doEncode(encoder *xml.Encoder, node *yaml.Node, start xml.S return fmt.Errorf("unsupported type %v", node.Tag) } +var xmlEncodeMultilineCommentRegex = regexp.MustCompile(`(^|\n) *# ?(.*)`) +var xmlEncodeSingleLineCommentRegex = regexp.MustCompile(`^\s*#(.*)\n?`) +var chompRegexp = regexp.MustCompile(`\n$`) + func (e *xmlEncoder) encodeComment(encoder *xml.Encoder, commentStr string) error { if commentStr != "" { - log.Debugf("encoding comment %v", commentStr) - if !strings.HasSuffix(commentStr, " ") { - commentStr = commentStr + " " + log.Debugf("got comment [%v]", commentStr) + // multi line string + if len(commentStr) > 2 && strings.Contains(commentStr[1:len(commentStr)-1], "\n") { + commentStr = chompRegexp.ReplaceAllString(commentStr, "") + log.Debugf("chompRegexp [%v]", commentStr) + commentStr = xmlEncodeMultilineCommentRegex.ReplaceAllString(commentStr, "$1$2") + log.Debugf("processed multine [%v]", commentStr) + // if the first line is non blank, add a space + if commentStr[0] != '\n' && commentStr[0] != ' ' { + commentStr = " " + commentStr + } + + } else { + commentStr = xmlEncodeSingleLineCommentRegex.ReplaceAllString(commentStr, "$1") } + if !strings.HasSuffix(commentStr, " ") && !strings.HasSuffix(commentStr, "\n") { + commentStr = commentStr + " " + log.Debugf("added suffix [%v]", commentStr) + } + log.Debugf("encoding comment [%v]", commentStr) + var comment xml.Comment = []byte(commentStr) err := encoder.EncodeToken(comment) if err != nil { diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index ec9f5b67..b505e6ec 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -10,6 +10,29 @@ import ( "github.com/mikefarah/yq/v4/test" ) +const yamlInputWithProcInstAndHeadComment = `# cats ++p_xml: version="1.0" +this: is some xml` + +const expectedXmlProcInstAndHeadComment = ` + +is some xml +` + +const xmlProcInstAndHeadCommentBlock = ` + +is some xml +` + +const expectedYamlProcInstAndHeadCommentBlock = `# +# cats +# ++p_xml: version="1.0" +this: is some xml +` + const inputXMLWithComments = ` @@ -128,7 +151,8 @@ cat: # after cat ` -const expectedRoundtripXMLWithComments = ` +const expectedRoundtripXMLWithComments = ` + 3 @@ -139,8 +163,10 @@ in d before --> ` -const yamlWithComments = `# header comment +const yamlWithComments = `# +# header comment # above_cat +# cat: # inline_cat # above_array array: # inline_array @@ -151,9 +177,11 @@ cat: # inline_cat ` const expectedXMLWithComments = ` +header comment +above_cat +--> + + val1 val2 @@ -266,6 +294,41 @@ var xmlScenarios = []formatScenario{ input: "quicksoftsquishy", expected: "root:\n cats:\n cat:\n - quick\n - soft\n # kitty_comment\n\n - squishy\n", }, + { + description: "ProcInst with head comment", + skipDoc: true, + input: yamlInputWithProcInstAndHeadComment, + expected: expectedXmlProcInstAndHeadComment, + scenarioType: "encode", + }, + { + description: "ProcInst with head comment round trip", + skipDoc: true, + input: expectedXmlProcInstAndHeadComment, + expected: expectedXmlProcInstAndHeadComment, + scenarioType: "roundtrip", + }, + { + description: "ProcInst with block head comment to yaml", + skipDoc: true, + input: xmlProcInstAndHeadCommentBlock, + expected: expectedYamlProcInstAndHeadCommentBlock, + scenarioType: "decode", + }, + { + description: "ProcInst with block head comment from yaml", + skipDoc: true, + input: expectedYamlProcInstAndHeadCommentBlock, + expected: xmlProcInstAndHeadCommentBlock, + scenarioType: "encode", + }, + { + description: "ProcInst with head comment round trip block", + skipDoc: true, + input: xmlProcInstAndHeadCommentBlock, + expected: xmlProcInstAndHeadCommentBlock, + scenarioType: "roundtrip", + }, { description: "Parse xml: simple", subdescription: "Notice how all the values are strings, see the next example on how you can fix that.", @@ -466,6 +529,41 @@ var xmlScenarios = []formatScenario{ expected: "cool\n", scenarioType: "encode", }, + { + description: "round trip multiline 1", + skipDoc: true, + input: "\n", + expected: "\n", + scenarioType: "roundtrip", + }, + { + description: "round trip multiline 2", + skipDoc: true, + input: "\n", + expected: "\n", + scenarioType: "roundtrip", + }, + { + description: "round trip multiline 3", + skipDoc: true, + input: "\n", + expected: "\n", + scenarioType: "roundtrip", + }, + { + description: "round trip multiline 4", + skipDoc: true, + input: "\n", + expected: "\n", + scenarioType: "roundtrip", + }, + { + description: "round trip multiline 5", + skipDoc: true, // pity spaces aren't kept atm. + input: "\n", + expected: "\n", + scenarioType: "roundtrip", + }, { description: "Encode xml: comments", subdescription: "A best attempt is made to copy comments to xml.",