This commit is contained in:
Mike Farah 2021-12-31 19:58:39 +11:00
parent cdd2a90a98
commit 8c512dcc40
4 changed files with 61 additions and 42 deletions

View File

@ -5,6 +5,8 @@ import (
"fmt"
"io"
"strings"
// "strings"
"unicode"
"golang.org/x/net/html/charset"
@ -61,9 +63,16 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
return yamlNode, nil
}
func (dec *xmlDecoder) processComment(c string) string {
if c == "" {
return ""
}
return "#" + strings.TrimRight(c, " ")
}
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment)
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.HeadComment, FootComment: n.FootComment}
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: dec.processComment(n.HeadComment), FootComment: dec.processComment(n.FootComment)}
if len(n.Data) > 0 {
label := dec.contentPrefix
@ -83,7 +92,13 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
return nil, err
}
} else {
// comment hack for maps of scalars
// if the value is a scalar, the head comment of the scalar needs to go on the key?
// add tests for <z/> as well as multiple <ds> of inputXmlWithComments > yaml
if len(children[0].Children) == 0 {
labelNode.HeadComment = joinFilter([]string{labelNode.HeadComment, children[0].HeadComment})
children[0].HeadComment = ""
}
valueNode, err = dec.convertToYamlNode(children[0])
if err != nil {
return nil, err
@ -101,9 +116,9 @@ func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
}
scalar := createScalarNode(n.Data, n.Data)
log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment)
scalar.HeadComment = n.HeadComment
scalar.LineComment = n.LineComment
scalar.FootComment = n.FootComment
scalar.HeadComment = dec.processComment(n.HeadComment)
scalar.LineComment = dec.processComment(n.LineComment)
scalar.FootComment = dec.processComment(n.FootComment)
return scalar, nil
}
@ -226,23 +241,24 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
commentStr := string(xml.CharData(se))
if elem.state == "started" {
log.Debug("got a foot comment for %v: %v", elem.label, commentStr)
log.Debug("got a foot comment for %v: [%v]", elem.label, commentStr)
// elem.n.FootComment = elem.n.FootComment + commentStr
// put the comment on the foot of the last child
if len(elem.n.Children) > 0 {
child := elem.n.Children[len(elem.n.Children)-1]
log.Debug("putting it here: %v", child.K)
child.V[0].FootComment = joinFilter([]string{child.V[0].FootComment, commentStr})
child.V[len(child.V)-1].FootComment = joinFilter([]string{child.V[len(child.V)-1].FootComment, commentStr})
} else {
log.Debug("putting it on the element")
elem.n.FootComment = joinFilter([]string{elem.n.FootComment, commentStr})
}
} else if elem.state == "chardata" {
log.Debug("got a line comment for (%v) %v: [%v]", elem.state, elem.label, commentStr)
elem.n.LineComment = joinFilter([]string{elem.n.LineComment, commentStr})
} else {
log.Debug("got a head comment for (%v) %v: %v", elem.state, elem.label, commentStr)
log.Debug("got a head comment for (%v) %v: [%v]", elem.state, elem.label, commentStr)
elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr})
}

View File

@ -22,8 +22,8 @@ XML nodes that have attributes then plain content, e.g:
The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this.
## Round trip: with comments
A best effort is made, but comment positions and white space are not preserved perfectly.
## Parse xml: with comments
A best attempt is made to preserve comments.
Given a sample.xml file of:
```xml
@ -32,12 +32,13 @@ Given a sample.xml file of:
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
line comment
for x --></x>
<!-- before y -->
<y>
<!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d>
<d><!-- in d before -->z<!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
@ -51,21 +52,19 @@ yq e -p=xml '.' sample.xml
```
will output
```yaml
# before cat
# before cat
cat:
# in cat before
x: "3" # multi
# line comment
# for x
# before y
# in cat before
x: "3" # multi
# line comment
# for x
# before y
y:
# in y before
d: "4" # in d after
# in y after
# in d before
d: z # in d after
# in y after
# after cat
# after cat
```

View File

@ -63,12 +63,13 @@ var inputXmlWithComments = `
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
line comment
for x --></x>
<!-- before y -->
<y>
<!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d>
<d><!-- in d before -->z<!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
@ -80,8 +81,10 @@ var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat
cat:
# in cat before
x: "3" # multi
# line comment
# line comment
# for x
# before y
y:
# in y before
d: "4" # in d before in d after
@ -111,8 +114,8 @@ cat: # inline_cat
`
var expectedXmlWithComments = `<!-- above_cat inline_cat--><cat><!-- above_array inline_array-->
<array><!-- inline_val1-->val1</array>
<array><!-- above_val2 inline_val2-->val2</array>
<array>val1<!-- inline_val1--></array>
<array><!-- above_val2-->val2<!-- inline_val2--></array>
</cat><!-- below_cat-->
`
@ -140,13 +143,13 @@ var xmlScenarios = []xmlScenario{
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
// expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
// },
// {
// description: "Parse xml: with comments",
// subdescription: "A best attempt is made to preserve comments.",
// input: inputXmlWithComments,
// expected: expectedDecodeYamlWithComments,
// scenarioType: "decode",
// },
{
description: "Parse xml: with comments",
subdescription: "A best attempt is made to preserve comments.",
input: inputXmlWithComments,
expected: expectedDecodeYamlWithComments,
scenarioType: "decode",
},
// {
// description: "Encode xml: simple",
// input: "cat: purrs",
@ -186,13 +189,13 @@ var xmlScenarios = []xmlScenario{
// expected: expectedXmlWithComments,
// scenarioType: "encode",
// },
{
description: "Round trip: with comments",
subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
input: inputXmlWithComments,
expected: expectedRoundtripXmlWithComments,
scenarioType: "roundtrip",
},
// {
// description: "Round trip: with comments",
// subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
// input: inputXmlWithComments,
// expected: expectedRoundtripXmlWithComments,
// scenarioType: "roundtrip",
// },
}
func testXmlScenario(t *testing.T, s xmlScenario) {

View File

@ -66,5 +66,6 @@ func AssertResultWithContext(t *testing.T, expectedValue interface{}, actualValu
if expectedValue != actualValue {
t.Error(context)
t.Error(": expected <\n", strings.ReplaceAll(fmt.Sprintf("%v", expectedValue), " ", "@"), ">\n but got <\n", strings.ReplaceAll(fmt.Sprintf("%v", actualValue), " ", "@"), ">\n")
// t.Error(": expected <\n", expectedValue, ">\n but got <\n", actualValue, ">\n")
}
}