This commit is contained in:
Mike Farah 2021-12-31 19:58:39 +11:00
parent cdd2a90a98
commit 8c512dcc40
4 changed files with 61 additions and 42 deletions

View File

@ -5,6 +5,8 @@ import (
"fmt" "fmt"
"io" "io"
"strings" "strings"
// "strings"
"unicode" "unicode"
"golang.org/x/net/html/charset" "golang.org/x/net/html/charset"
@ -61,9 +63,16 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
return yamlNode, nil return yamlNode, nil
} }
func (dec *xmlDecoder) processComment(c string) string {
if c == "" {
return ""
}
return "#" + strings.TrimRight(c, " ")
}
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment) log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment)
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.HeadComment, FootComment: n.FootComment} yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: dec.processComment(n.HeadComment), FootComment: dec.processComment(n.FootComment)}
if len(n.Data) > 0 { if len(n.Data) > 0 {
label := dec.contentPrefix label := dec.contentPrefix
@ -83,7 +92,13 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
return nil, err return nil, err
} }
} else { } else {
// comment hack for maps of scalars
// if the value is a scalar, the head comment of the scalar needs to go on the key?
// add tests for <z/> as well as multiple <ds> of inputXmlWithComments > yaml
if len(children[0].Children) == 0 {
labelNode.HeadComment = joinFilter([]string{labelNode.HeadComment, children[0].HeadComment})
children[0].HeadComment = ""
}
valueNode, err = dec.convertToYamlNode(children[0]) valueNode, err = dec.convertToYamlNode(children[0])
if err != nil { if err != nil {
return nil, err return nil, err
@ -101,9 +116,9 @@ func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
} }
scalar := createScalarNode(n.Data, n.Data) scalar := createScalarNode(n.Data, n.Data)
log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment) log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment)
scalar.HeadComment = n.HeadComment scalar.HeadComment = dec.processComment(n.HeadComment)
scalar.LineComment = n.LineComment scalar.LineComment = dec.processComment(n.LineComment)
scalar.FootComment = n.FootComment scalar.FootComment = dec.processComment(n.FootComment)
return scalar, nil return scalar, nil
} }
@ -226,23 +241,24 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
commentStr := string(xml.CharData(se)) commentStr := string(xml.CharData(se))
if elem.state == "started" { if elem.state == "started" {
log.Debug("got a foot comment for %v: %v", elem.label, commentStr) log.Debug("got a foot comment for %v: [%v]", elem.label, commentStr)
// elem.n.FootComment = elem.n.FootComment + commentStr // elem.n.FootComment = elem.n.FootComment + commentStr
// put the comment on the foot of the last child // put the comment on the foot of the last child
if len(elem.n.Children) > 0 { if len(elem.n.Children) > 0 {
child := elem.n.Children[len(elem.n.Children)-1] child := elem.n.Children[len(elem.n.Children)-1]
log.Debug("putting it here: %v", child.K) log.Debug("putting it here: %v", child.K)
child.V[0].FootComment = joinFilter([]string{child.V[0].FootComment, commentStr}) child.V[len(child.V)-1].FootComment = joinFilter([]string{child.V[len(child.V)-1].FootComment, commentStr})
} else { } else {
log.Debug("putting it on the element") log.Debug("putting it on the element")
elem.n.FootComment = joinFilter([]string{elem.n.FootComment, commentStr}) elem.n.FootComment = joinFilter([]string{elem.n.FootComment, commentStr})
} }
} else if elem.state == "chardata" { } else if elem.state == "chardata" {
log.Debug("got a line comment for (%v) %v: [%v]", elem.state, elem.label, commentStr)
elem.n.LineComment = joinFilter([]string{elem.n.LineComment, commentStr}) elem.n.LineComment = joinFilter([]string{elem.n.LineComment, commentStr})
} else { } else {
log.Debug("got a head comment for (%v) %v: %v", elem.state, elem.label, commentStr) log.Debug("got a head comment for (%v) %v: [%v]", elem.state, elem.label, commentStr)
elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr}) elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr})
} }

View File

@ -22,8 +22,8 @@ XML nodes that have attributes then plain content, e.g:
The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this. The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this.
## Round trip: with comments ## Parse xml: with comments
A best effort is made, but comment positions and white space are not preserved perfectly. A best attempt is made to preserve comments.
Given a sample.xml file of: Given a sample.xml file of:
```xml ```xml
@ -37,7 +37,8 @@ for x --></x>
<!-- before y --> <!-- before y -->
<y> <y>
<!-- in y before --> <!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d> <d><!-- in d before -->z<!-- in d after --></d>
<!-- in y after --> <!-- in y after -->
</y> </y>
<!-- in_cat_after --> <!-- in_cat_after -->
@ -51,21 +52,19 @@ yq e -p=xml '.' sample.xml
``` ```
will output will output
```yaml ```yaml
# before cat # before cat
cat: cat:
# in cat before # in cat before
x: "3" # multi x: "3" # multi
# line comment # line comment
# for x # for x
# before y # before y
y: y:
# in y before
d: "4" # in d after
# in y after
# in d before # in d before
d: z # in d after
# in y after
# after cat # after cat
``` ```

View File

@ -68,7 +68,8 @@ for x --></x>
<!-- before y --> <!-- before y -->
<y> <y>
<!-- in y before --> <!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d> <d><!-- in d before -->z<!-- in d after --></d>
<!-- in y after --> <!-- in y after -->
</y> </y>
<!-- in_cat_after --> <!-- in_cat_after -->
@ -82,6 +83,8 @@ cat:
x: "3" # multi x: "3" # multi
# line comment # line comment
# for x # for x
# before y
y: y:
# in y before # in y before
d: "4" # in d before in d after d: "4" # in d before in d after
@ -111,8 +114,8 @@ cat: # inline_cat
` `
var expectedXmlWithComments = `<!-- above_cat inline_cat--><cat><!-- above_array inline_array--> var expectedXmlWithComments = `<!-- above_cat inline_cat--><cat><!-- above_array inline_array-->
<array><!-- inline_val1-->val1</array> <array>val1<!-- inline_val1--></array>
<array><!-- above_val2 inline_val2-->val2</array> <array><!-- above_val2-->val2<!-- inline_val2--></array>
</cat><!-- below_cat--> </cat><!-- below_cat-->
` `
@ -140,13 +143,13 @@ var xmlScenarios = []xmlScenario{
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>", // input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
// expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n", // expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
// }, // },
// { {
// description: "Parse xml: with comments", description: "Parse xml: with comments",
// subdescription: "A best attempt is made to preserve comments.", subdescription: "A best attempt is made to preserve comments.",
// input: inputXmlWithComments, input: inputXmlWithComments,
// expected: expectedDecodeYamlWithComments, expected: expectedDecodeYamlWithComments,
// scenarioType: "decode", scenarioType: "decode",
// }, },
// { // {
// description: "Encode xml: simple", // description: "Encode xml: simple",
// input: "cat: purrs", // input: "cat: purrs",
@ -186,13 +189,13 @@ var xmlScenarios = []xmlScenario{
// expected: expectedXmlWithComments, // expected: expectedXmlWithComments,
// scenarioType: "encode", // scenarioType: "encode",
// }, // },
{ // {
description: "Round trip: with comments", // description: "Round trip: with comments",
subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.", // subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
input: inputXmlWithComments, // input: inputXmlWithComments,
expected: expectedRoundtripXmlWithComments, // expected: expectedRoundtripXmlWithComments,
scenarioType: "roundtrip", // scenarioType: "roundtrip",
}, // },
} }
func testXmlScenario(t *testing.T, s xmlScenario) { func testXmlScenario(t *testing.T, s xmlScenario) {

View File

@ -66,5 +66,6 @@ func AssertResultWithContext(t *testing.T, expectedValue interface{}, actualValu
if expectedValue != actualValue { if expectedValue != actualValue {
t.Error(context) t.Error(context)
t.Error(": expected <\n", strings.ReplaceAll(fmt.Sprintf("%v", expectedValue), " ", "@"), ">\n but got <\n", strings.ReplaceAll(fmt.Sprintf("%v", actualValue), " ", "@"), ">\n") t.Error(": expected <\n", strings.ReplaceAll(fmt.Sprintf("%v", expectedValue), " ", "@"), ">\n but got <\n", strings.ReplaceAll(fmt.Sprintf("%v", actualValue), " ", "@"), ">\n")
// t.Error(": expected <\n", expectedValue, ">\n but got <\n", actualValue, ">\n")
} }
} }