yq/pkg/yqlib/xml_test.go
Mike Farah 23d3d962e0 Refactored decoder responsibilities
- improved comment handling
- yaml decoder now responsible for leading content work around
2022-10-28 14:05:20 +11:00

607 lines
18 KiB
Go

package yqlib
import (
"bufio"
"fmt"
"testing"
"github.com/mikefarah/yq/v4/test"
)
const inputXMLWithComments = `
<!-- before cat -->
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x>
<!-- before y -->
<y>
<!-- in y before -->
<d><!-- in d before -->z<!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
</cat>
<!-- after cat -->
`
const inputXMLWithCommentsWithSubChild = `
<!-- before cat -->
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x>
<!-- before y -->
<y>
<!-- in y before -->
<d><!-- in d before --><z sweet="cool"/><!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
</cat>
<!-- after cat -->
`
const expectedDecodeYamlWithSubChild = `# before cat
cat:
# in cat before
x: "3" # multi
# line comment
# for x
# before y
y:
# in y before
d:
# in d before
z:
+sweet: cool
# in d after
# in y after
# in_cat_after
# after cat
`
const inputXMLWithCommentsWithArray = `
<!-- before cat -->
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x>
<!-- before y -->
<y>
<!-- in y before -->
<d><!-- in d before --><z sweet="cool"/><!-- in d after --></d>
<d><!-- in d2 before --><z sweet="cool2"/><!-- in d2 after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
</cat>
<!-- after cat -->
`
const expectedDecodeYamlWithArray = `# before cat
cat:
# in cat before
x: "3" # multi
# line comment
# for x
# before y
y:
# in y before
d:
- # in d before
z:
+sweet: cool
# in d after
- # in d2 before
z:
+sweet: cool2
# in d2 after
# in y after
# in_cat_after
# after cat
`
const expectedDecodeYamlWithComments = `# before cat
cat:
# in cat before
x: "3" # multi
# line comment
# for x
# before y
y:
# in y before
# in d before
d: z # in d after
# in y after
# in_cat_after
# after cat
`
const expectedRoundtripXMLWithComments = `<!-- before cat --><cat><!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x><!-- before y -->
<y><!-- in y before
in d before -->
<d>z<!-- in d after --></d><!-- in y after -->
</y><!-- in_cat_after -->
</cat><!-- after cat -->
`
const yamlWithComments = `# header comment
# above_cat
cat: # inline_cat
# above_array
array: # inline_array
- val1 # inline_val1
# above_val2
- val2 # inline_val2
# below_cat
`
const expectedXMLWithComments = `<!--
header comment
above_cat
--><!-- inline_cat --><cat><!-- above_array inline_array -->
<array>val1<!-- inline_val1 --></array>
<array><!-- above_val2 -->val2<!-- inline_val2 --></array>
</cat><!-- below_cat -->
`
const inputXMLWithNamespacedAttr = `
<?xml version="1.0"?>
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url">
</map>
`
const expectedYAMLWithNamespacedAttr = `+p_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+some-instance:schemaLocation: some-url
`
const expectedYAMLWithRawNamespacedAttr = `+p_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+xsi:schemaLocation: some-url
`
const xmlWithCustomDtd = `
<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]>
<root>
<item>&writer;&copyright;</item>
</root>`
const expectedDtd = `<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]>
<root>
<item>&amp;writer;&amp;copyright;</item>
</root>
`
const expectedSkippedDtd = `<?xml version="1.0"?>
<root>
<item>&amp;writer;&amp;copyright;</item>
</root>
`
const xmlWithProcInstAndDirectives = `<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple>
<?coolioo version="1.0"?>
<!CATYPE meow purr puss >
<b>things</b>
</apple>
`
const yamlWithProcInstAndDirectives = `+p_xml: version="1.0"
+directive: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
apple:
+p_coolioo: version="1.0"
+directive: 'CATYPE meow purr puss '
b: things
`
const expectedXmlWithProcInstAndDirectives = `<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
`
var xmlScenarios = []formatScenario{
{
description: "Parse xml: simple",
subdescription: "Notice how all the values are strings, see the next example on how you can fix that.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>\n <says>meow</says>\n <legs>4</legs>\n <cute>true</cute>\n</cat>",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: \"4\"\n cute: \"true\"\n",
},
{
description: "Parse xml: number",
subdescription: "All values are assumed to be strings when parsing XML, but you can use the `from_yaml` operator on all the strings values to autoparse into the correct type.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>\n <says>meow</says>\n <legs>4</legs>\n <cute>true</cute>\n</cat>",
expression: " (.. | select(tag == \"!!str\")) |= from_yaml",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: 4\n cute: true\n",
},
{
description: "Parse xml: array",
subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>cat</animal>\n<animal>goat</animal>",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\nanimal:\n - cat\n - goat\n",
},
{
description: "Parse xml: attributes",
subdescription: "Attributes are converted to fields, with the default attribute prefix '+'. Use '--xml-attribute-prefix` to set your own.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +legs: \"4\"\n legs: \"7\"\n",
},
{
description: "Parse xml: attributes with content",
subdescription: "Content is added as a field, using the default content name of `+content`. Use `--xml-content-name` to set your own.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +content: meow\n +legs: \"4\"\n",
},
{
description: "Parse xml: custom dtd",
subdescription: "DTD entities are processed as directives.",
input: xmlWithCustomDtd,
expected: expectedDtd,
scenarioType: "roundtrip",
},
{
description: "Parse xml: skip custom dtd",
subdescription: "DTDs are directives, skip over directives to skip DTDs.",
input: xmlWithCustomDtd,
expected: expectedSkippedDtd,
scenarioType: "roundtrip-skip-directives",
},
{
description: "Parse xml: with comments",
subdescription: "A best attempt is made to preserve comments.",
input: inputXMLWithComments,
expected: expectedDecodeYamlWithComments,
scenarioType: "decode",
},
{
description: "Empty doc",
skipDoc: true,
input: "",
expected: "\n",
scenarioType: "decode",
},
{
description: "Empty single node",
skipDoc: true,
input: "<a/>",
expected: "a:\n",
scenarioType: "decode",
},
{
description: "Empty close node",
skipDoc: true,
input: "<a></a>",
expected: "a:\n",
scenarioType: "decode",
},
{
description: "Nested empty",
skipDoc: true,
input: "<a><b/></a>",
expected: "a:\n b:\n",
scenarioType: "decode",
},
{
description: "Parse xml: with comments subchild",
skipDoc: true,
input: inputXMLWithCommentsWithSubChild,
expected: expectedDecodeYamlWithSubChild,
scenarioType: "decode",
},
{
description: "Parse xml: with comments array",
skipDoc: true,
input: inputXMLWithCommentsWithArray,
expected: expectedDecodeYamlWithArray,
scenarioType: "decode",
},
{
description: "Parse xml: keep attribute namespace",
skipDoc: false,
input: inputXMLWithNamespacedAttr,
expected: expectedYAMLWithNamespacedAttr,
scenarioType: "decode-keep-ns",
},
{
description: "Parse xml: keep raw attribute namespace",
skipDoc: false,
input: inputXMLWithNamespacedAttr,
expected: expectedYAMLWithRawNamespacedAttr,
scenarioType: "decode-raw-token",
},
{
description: "Encode xml: simple",
input: "cat: purrs",
expected: "<cat>purrs</cat>\n",
scenarioType: "encode",
},
{
description: "includes map tags",
skipDoc: true,
input: "<cat>purrs</cat>\n",
expression: `tag`,
expected: "!!map\n",
scenarioType: "decode",
},
{
description: "includes array tags",
skipDoc: true,
input: "<cat>purrs</cat><cat>purrs</cat>\n",
expression: `.cat | tag`,
expected: "!!seq\n",
scenarioType: "decode",
},
{
description: "Encode xml: array",
input: "pets:\n cat:\n - purrs\n - meows",
expected: "<pets>\n <cat>purrs</cat>\n <cat>meows</cat>\n</pets>\n",
scenarioType: "encode",
},
{
description: "Encode xml: attributes",
subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.",
input: "cat:\n +name: tiger\n meows: true\n",
expected: "<cat name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
scenarioType: "encode",
},
{
description: "double prefix",
skipDoc: true,
input: "cat:\n ++@name: tiger\n meows: true\n",
expected: "<cat +@name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
scenarioType: "encode",
},
{
description: "Encode xml: attributes with content",
subdescription: "Fields with the matching xml-content-name is assumed to be content.",
input: "cat:\n +name: tiger\n +content: cool\n",
expected: "<cat name=\"tiger\">cool</cat>\n",
scenarioType: "encode",
},
{
description: "Encode xml: comments",
subdescription: "A best attempt is made to copy comments to xml.",
input: yamlWithComments,
expected: expectedXMLWithComments,
scenarioType: "encode",
},
{
description: "Encode: doctype and xml declaration",
subdescription: "Use the special xml names to add/modify proc instructions and directives.",
input: yamlWithProcInstAndDirectives,
expected: expectedXmlWithProcInstAndDirectives,
scenarioType: "encode",
},
{
description: "Round trip: with comments",
subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
input: inputXMLWithComments,
expected: expectedRoundtripXMLWithComments,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: with doctype and declaration",
subdescription: "yq parses XML proc instructions and directives into nodes.\nUnfortunately the underlying XML parser loses whitespace information.",
input: xmlWithProcInstAndDirectives,
expected: expectedXmlWithProcInstAndDirectives,
scenarioType: "roundtrip",
},
}
func testXMLScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "", "decode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewYamlEncoder(4, false, ConfiguredYamlPreferences)), s.description)
case "encode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewXMLEncoder(2, ConfiguredXMLPreferences)), s.description)
case "roundtrip":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewXMLEncoder(2, ConfiguredXMLPreferences)), s.description)
case "decode-keep-ns":
prefs := NewDefaultXmlPreferences()
prefs.KeepNamespace = true
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-raw-token":
prefs := NewDefaultXmlPreferences()
prefs.UseRawToken = true
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "roundtrip-skip-directives":
prefs := NewDefaultXmlPreferences()
prefs.SkipDirectives = true
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs)), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}
func documentXMLScenario(t *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
return
}
switch s.scenarioType {
case "", "decode":
documentXMLDecodeScenario(w, s)
case "encode":
documentXMLEncodeScenario(w, s)
case "roundtrip":
documentXMLRoundTripScenario(w, s)
case "decode-keep-ns":
documentXMLDecodeKeepNsScenario(w, s)
case "decode-raw-token":
documentXMLDecodeKeepNsRawTokenScenario(w, s)
case "roundtrip-skip-directives":
documentXMLSkipDirectrivesScenario(w, s)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}
func documentXMLDecodeScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression == "" {
expression = "."
}
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=xml '%v' sample.xml\n```\n", expression))
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences))))
}
func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
prefs := NewDefaultXmlPreferences()
prefs.KeepNamespace = true
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs))))
prefsWithout := NewDefaultXmlPreferences()
prefs.KeepNamespace = false
writeOrPanic(w, "instead of\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefsWithout), NewXMLEncoder(2, prefsWithout))))
}
func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
prefs := NewDefaultXmlPreferences()
prefs.KeepNamespace = true
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs))))
prefsWithout := NewDefaultXmlPreferences()
prefsWithout.KeepNamespace = false
writeOrPanic(w, "instead of\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefsWithout), NewXMLEncoder(2, prefsWithout))))
}
func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.yml file of:\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -o=xml '.' sample.yml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewXMLEncoder(2, ConfiguredXMLPreferences))))
}
func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewXMLEncoder(2, ConfiguredXMLPreferences))))
}
func documentXMLSkipDirectrivesScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-skip-directives '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
prefs := NewDefaultXmlPreferences()
prefs.SkipDirectives = true
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs))))
}
func TestXMLScenarios(t *testing.T) {
for _, tt := range xmlScenarios {
testXMLScenario(t, tt)
}
genericScenarios := make([]interface{}, len(xmlScenarios))
for i, s := range xmlScenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "xml", genericScenarios, documentXMLScenario)
}