//go:build !yq_noxml package xml import ( "bufio" "fmt" "testing" "github.com/mikefarah/yq/v4/pkg/yqlib" "github.com/mikefarah/yq/v4/test" ) const yamlInputWithProcInstAndHeadComment = `# cats +p_xml: version="1.0" this: is some xml` const expectedXmlProcInstAndHeadComment = ` is some xml ` const xmlProcInstAndHeadCommentBlock = ` is some xml ` const expectedYamlProcInstAndHeadCommentBlock = `# # cats # +p_xml: version="1.0" this: is some xml ` const inputXMLWithComments = ` 3 z ` const inputXMLWithCommentsWithSubChild = ` 3 ` const expectedDecodeYamlWithSubChild = `# before cat cat: # in cat before x: "3" # multi # line comment # for x # before y y: # in y before d: # in d before z: +@sweet: cool # in d after # in y after # in_cat_after # after cat ` const inputXMLWithCommentsWithArray = ` 3 ` const expectedDecodeYamlWithArray = `# before cat cat: # in cat before x: "3" # multi # line comment # for x # before y y: # in y before d: - # in d before z: +@sweet: cool # in d after - # in d2 before z: +@sweet: cool2 # in d2 after # in y after # in_cat_after # after cat ` const expectedDecodeYamlWithComments = `# before cat cat: # in cat before x: "3" # multi # line comment # for x # before y y: # in y before # in d before d: z # in d after # in y after # in_cat_after # after cat ` const expectedRoundtripXMLWithComments = ` 3 z ` const yamlWithComments = `# # header comment # above_cat # cat: # inline_cat # above_array array: # inline_array - val1 # inline_val1 # above_val2 - val2 # inline_val2 # below_cat ` const expectedXMLWithComments = ` val1 val2 ` const inputXMLWithNamespacedAttr = ` ` const expectedYAMLWithNamespacedAttr = `+p_xml: version="1.0" map: +@xmlns: some-namespace +@xmlns:xsi: some-instance +@xsi:schemaLocation: some-url ` const expectedYAMLWithRawNamespacedAttr = `+p_xml: version="1.0" map: +@xmlns: some-namespace +@xmlns:xsi: some-instance +@xsi:schemaLocation: some-url ` const expectedYAMLWithoutRawNamespacedAttr = `+p_xml: version="1.0" map: +@xmlns: some-namespace +@xmlns:xsi: some-instance +@some-instance:schemaLocation: some-url ` const xmlWithCustomDtd = ` ]> &writer;©right; ` const expectedDtd = ` ]> &writer;&copyright; ` const expectedSkippedDtd = ` &writer;&copyright; ` const xmlWithProcInstAndDirectives = ` things ` const yamlWithProcInstAndDirectives = `+p_xml: version="1.0" +directive: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" ' apple: +p_coolioo: version="1.0" +directive: 'CATYPE meow purr puss ' b: things ` const expectedXmlWithProcInstAndDirectives = ` things ` var xmlScenarios = []yqlib.FormatScenario{ { skipDoc: true, description: "bad xml", input: ``, expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\n", }, { skipDoc: true, input: " value ", expected: "root: value # comment\n", }, { skipDoc: true, input: "valuevalue", expectedError: "bad file 'sample.yml': invalid XML: Encountered chardata [value] outside of XML node", scenarioType: "decode-error", }, { skipDoc: true, input: "value", expected: "# comment\nroot: value\n", }, { skipDoc: true, input: " ", expected: "root: # comment\n", }, { skipDoc: true, input: "valueanotherValue ", expected: "root:\n # comment\n - value\n - anotherValue\n", }, { skipDoc: true, input: "quicksoftsquishy", expected: "root:\n cats:\n cat:\n - quick\n - soft\n # kitty_comment\n\n - squishy\n", }, { description: "ProcInst with head comment", skipDoc: true, input: yamlInputWithProcInstAndHeadComment, expected: expectedXmlProcInstAndHeadComment, scenarioType: "encode", }, { description: "Scalar roundtrip", skipDoc: true, input: "cat", expression: ".mike", expected: "cat", scenarioType: "roundtrip", }, { description: "ProcInst with head comment round trip", skipDoc: true, input: expectedXmlProcInstAndHeadComment, expected: expectedXmlProcInstAndHeadComment, scenarioType: "roundtrip", }, { description: "ProcInst with block head comment to yaml", skipDoc: true, input: xmlProcInstAndHeadCommentBlock, expected: expectedYamlProcInstAndHeadCommentBlock, scenarioType: "decode", }, { description: "ProcInst with block head comment from yaml", skipDoc: true, input: expectedYamlProcInstAndHeadCommentBlock, expected: xmlProcInstAndHeadCommentBlock, scenarioType: "encode", }, { description: "ProcInst with head comment round trip block", skipDoc: true, input: xmlProcInstAndHeadCommentBlock, expected: xmlProcInstAndHeadCommentBlock, scenarioType: "roundtrip", }, { description: "Parse xml: simple", subdescription: "Notice how all the values are strings, see the next example on how you can fix that.", input: "\n\n meow\n 4\n true\n", expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: \"4\"\n cute: \"true\"\n", }, { description: "Parse xml: number", subdescription: "All values are assumed to be strings when parsing XML, but you can use the `from_yaml` operator on all the strings values to autoparse into the correct type.", input: "\n\n meow\n 4\n true\n", expression: " (.. | select(tag == \"!!str\")) |= from_yaml", expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: 4\n cute: true\n", }, { description: "Parse xml: array", subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.", input: "\ncat\ngoat", expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\nanimal:\n - cat\n - goat\n", }, { description: "Parse xml: force as an array", subdescription: "In XML, if your array has a single item, then yq doesn't know its an array. This is how you can consistently force it to be an array. This handles the 3 scenarios of having nothing in the array, having a single item and having multiple.", input: "cat", expression: ".zoo.animal |= ([] + .)", expected: "zoo:\n animal:\n - cat\n", }, { description: "Parse xml: force all as an array", input: "boing", expression: ".. |= [] + .", expected: "- zoo:\n - thing:\n - frog:\n - boing\n", }, { description: "Parse xml: attributes", subdescription: "Attributes are converted to fields, with the default attribute prefix '+'. Use '--xml-attribute-prefix` to set your own.", input: "\n\n 7\n", expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +@legs: \"4\"\n legs: \"7\"\n", }, { description: "Parse xml: attributes with content", subdescription: "Content is added as a field, using the default content name of `+content`. Use `--xml-content-name` to set your own.", input: "\nmeow", expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +content: meow\n +@legs: \"4\"\n", }, { description: "Parse xml: content split between comments/children", subdescription: "Multiple content texts are collected into a sequence.", input: " value anotherValue frog cool!", expected: "root:\n +content: # comment\n - value\n - anotherValue\n - cool!\n a: frog\n", }, { description: "Parse xml: custom dtd", subdescription: "DTD entities are processed as directives.", input: xmlWithCustomDtd, expected: expectedDtd, scenarioType: "roundtrip", }, { description: "Roundtrip with name spaced attributes", skipDoc: true, input: inputXMLWithNamespacedAttr, expected: inputXMLWithNamespacedAttr, scenarioType: "roundtrip", }, { description: "Parse xml: skip custom dtd", subdescription: "DTDs are directives, skip over directives to skip DTDs.", input: xmlWithCustomDtd, expected: expectedSkippedDtd, scenarioType: "roundtrip-skip-directives", }, { description: "Parse xml: with comments", subdescription: "A best attempt is made to preserve comments.", input: inputXMLWithComments, expected: expectedDecodeYamlWithComments, scenarioType: "decode", }, { description: "Empty doc", skipDoc: true, input: "", expected: "\n", scenarioType: "decode", }, { description: "Empty single node", skipDoc: true, input: "", expected: "a:\n", scenarioType: "decode", }, { description: "Empty close node", skipDoc: true, input: "", expected: "a:\n", scenarioType: "decode", }, { description: "Nested empty", skipDoc: true, input: "", expected: "a:\n b:\n", scenarioType: "decode", }, { description: "Parse xml: with comments subchild", skipDoc: true, input: inputXMLWithCommentsWithSubChild, expected: expectedDecodeYamlWithSubChild, scenarioType: "decode", }, { description: "Parse xml: with comments array", skipDoc: true, input: inputXMLWithCommentsWithArray, expected: expectedDecodeYamlWithArray, scenarioType: "decode", }, { description: "Parse xml: keep attribute namespace", subdescription: fmt.Sprintf(`Defaults to %v`, ConfiguredXMLPreferences.KeepNamespace), skipDoc: false, input: inputXMLWithNamespacedAttr, expected: expectedYAMLWithNamespacedAttr, scenarioType: "decode-keep-ns", }, { description: "Parse xml: keep raw attribute namespace", skipDoc: true, input: inputXMLWithNamespacedAttr, expected: expectedYAMLWithRawNamespacedAttr, scenarioType: "decode-raw-token", }, { description: "Parse xml: keep raw attribute namespace", subdescription: fmt.Sprintf(`Defaults to %v`, ConfiguredXMLPreferences.UseRawToken), skipDoc: false, input: inputXMLWithNamespacedAttr, expected: expectedYAMLWithoutRawNamespacedAttr, scenarioType: "decode-raw-token-off", }, { description: "Encode xml: simple", input: "cat: purrs", expected: "purrs\n", scenarioType: "encode", }, { description: "includes map tags", skipDoc: true, input: "purrs\n", expression: `tag`, expected: "!!map\n", scenarioType: "decode", }, { description: "includes array tags", skipDoc: true, input: "purrspurrs\n", expression: `.cat | tag`, expected: "!!seq\n", scenarioType: "decode", }, { description: "Encode xml: array", input: "pets:\n cat:\n - purrs\n - meows", expected: "\n purrs\n meows\n\n", scenarioType: "encode", }, { description: "Encode xml: attributes", subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.", input: "cat:\n +@name: tiger\n meows: true\n", expected: "\n true\n\n", scenarioType: "encode", }, { description: "double prefix", skipDoc: true, input: "cat:\n +@+@name: tiger\n meows: true\n", expected: "\n true\n\n", scenarioType: "encode", }, { description: "arrays cannot be encoded", skipDoc: true, input: "[cat, dog, fish]", expectedError: "cannot encode !!seq to XML - only maps can be encoded", scenarioType: "encode-error", }, { description: "arrays cannot be encoded - 2", skipDoc: true, input: "[cat, dog]", expectedError: "cannot encode !!seq to XML - only maps can be encoded", scenarioType: "encode-error", }, { description: "Encode xml: attributes with content", subdescription: "Fields with the matching xml-content-name is assumed to be content.", input: "cat:\n +@name: tiger\n +content: cool\n", expected: "cool\n", scenarioType: "encode", }, { description: "round trip multiline 1", skipDoc: true, input: "\n", expected: "\n", scenarioType: "roundtrip", }, { description: "round trip multiline 2", skipDoc: true, input: "\n", expected: "\n", scenarioType: "roundtrip", }, { description: "round trip multiline 3", skipDoc: true, input: "\n", expected: "\n", scenarioType: "roundtrip", }, { description: "round trip multiline 4", skipDoc: true, input: "\n", expected: "\n", scenarioType: "roundtrip", }, { description: "round trip multiline 5", skipDoc: true, // pity spaces aren't kept atm. input: "\n", expected: "\n", scenarioType: "roundtrip", }, { description: "Encode xml: comments", subdescription: "A best attempt is made to copy comments to xml.", input: yamlWithComments, expected: expectedXMLWithComments, scenarioType: "encode", }, { description: "Encode: doctype and xml declaration", subdescription: "Use the special xml names to add/modify proc instructions and directives.", input: yamlWithProcInstAndDirectives, expected: expectedXmlWithProcInstAndDirectives, scenarioType: "encode", }, { description: "Round trip: with comments", subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.", input: inputXMLWithComments, expected: expectedRoundtripXMLWithComments, scenarioType: "roundtrip", }, { description: "Roundtrip: with doctype and declaration", subdescription: "yq parses XML proc instructions and directives into nodes.\nUnfortunately the underlying XML parser loses whitespace information.", input: xmlWithProcInstAndDirectives, expected: expectedXmlWithProcInstAndDirectives, scenarioType: "roundtrip", }, } func testXMLScenario(t *testing.T, s yqlib.FormatScenario) { switch s.scenarioType { case "", "decode": yamlPrefs := yqlib.ConfiguredYamlPreferences.Copy() yamlPrefs.Indent = 4 test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewYamlEncoder(yamlPrefs)), s.description) case "encode": test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewXMLEncoder(ConfiguredXMLPreferences)), s.description) case "roundtrip": test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewXMLEncoder(ConfiguredXMLPreferences)), s.description) case "decode-keep-ns": prefs := NewDefaultXmlPreferences() prefs.KeepNamespace = true test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(ConfiguredYamlPreferences)), s.description) case "decode-raw-token": prefs := NewDefaultXmlPreferences() prefs.UseRawToken = true test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(ConfiguredYamlPreferences)), s.description) case "decode-raw-token-off": prefs := NewDefaultXmlPreferences() prefs.UseRawToken = false test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(ConfiguredYamlPreferences)), s.description) case "roundtrip-skip-directives": prefs := NewDefaultXmlPreferences() prefs.SkipDirectives = true test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(prefs)), s.description) case "decode-error": result, err := processFormatScenario(s, NewXMLDecoder(NewDefaultXmlPreferences()), NewYamlEncoder(ConfiguredYamlPreferences)) if err == nil { t.Errorf("Expected error '%v' but it worked: %v", s.expectedError, result) } else { test.AssertResultComplexWithContext(t, s.expectedError, err.Error(), s.description) } case "encode-error": result, err := processFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewXMLEncoder(NewDefaultXmlPreferences())) if err == nil { t.Errorf("Expected error '%v' but it worked: %v", s.expectedError, result) } else { test.AssertResultComplexWithContext(t, s.expectedError, err.Error(), s.description) } default: panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType)) } } func documentXMLScenario(_ *testing.T, w *bufio.Writer, i interface{}) { s := i.(FormatScenario) if s.skipDoc { return } switch s.scenarioType { case "", "decode": documentXMLDecodeScenario(w, s) case "encode": documentXMLEncodeScenario(w, s) case "roundtrip": documentXMLRoundTripScenario(w, s) case "decode-keep-ns": documentXMLDecodeKeepNsScenario(w, s) case "decode-raw-token-off": documentXMLDecodeKeepNsRawTokenScenario(w, s) case "roundtrip-skip-directives": documentXMLSkipDirectivesScenario(w, s) default: panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType)) } } func documentXMLDecodeScenario(w *bufio.Writer, s FormatScenario) { writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) if s.subdescription != "" { writeOrPanic(w, s.subdescription) writeOrPanic(w, "\n\n") } writeOrPanic(w, "Given a sample.xml file of:\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input)) writeOrPanic(w, "then\n") expression := s.expression if expression == "" { expression = "." } writeOrPanic(w, fmt.Sprintf("```bash\nyq -oy '%v' sample.xml\n```\n", expression)) writeOrPanic(w, "will output\n") writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewYamlEncoder(ConfiguredYamlPreferences)))) } func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s FormatScenario) { writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) if s.subdescription != "" { writeOrPanic(w, s.subdescription) writeOrPanic(w, "\n\n") } writeOrPanic(w, "Given a sample.xml file of:\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input)) writeOrPanic(w, "then\n") writeOrPanic(w, "```bash\nyq --xml-keep-namespace=false '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") prefs := NewDefaultXmlPreferences() prefs.KeepNamespace = false writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(prefs)))) prefsWithout := NewDefaultXmlPreferences() prefs.KeepNamespace = true writeOrPanic(w, "instead of\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(prefsWithout), NewXMLEncoder(prefsWithout)))) } func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s FormatScenario) { writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) if s.subdescription != "" { writeOrPanic(w, s.subdescription) writeOrPanic(w, "\n\n") } writeOrPanic(w, "Given a sample.xml file of:\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input)) writeOrPanic(w, "then\n") writeOrPanic(w, "```bash\nyq --xml-raw-token=false '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") prefs := NewDefaultXmlPreferences() prefs.UseRawToken = false writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(prefs)))) prefsWithout := NewDefaultXmlPreferences() prefsWithout.UseRawToken = true writeOrPanic(w, "instead of\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(prefsWithout), NewXMLEncoder(prefsWithout)))) } func documentXMLEncodeScenario(w *bufio.Writer, s FormatScenario) { writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) if s.subdescription != "" { writeOrPanic(w, s.subdescription) writeOrPanic(w, "\n\n") } writeOrPanic(w, "Given a sample.yml file of:\n") writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input)) writeOrPanic(w, "then\n") writeOrPanic(w, "```bash\nyq -o=xml sample.yml\n```\n") writeOrPanic(w, "will output\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewXMLEncoder(ConfiguredXMLPreferences)))) } func documentXMLRoundTripScenario(w *bufio.Writer, s FormatScenario) { writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) if s.subdescription != "" { writeOrPanic(w, s.subdescription) writeOrPanic(w, "\n\n") } writeOrPanic(w, "Given a sample.xml file of:\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input)) writeOrPanic(w, "then\n") writeOrPanic(w, "```bash\nyq '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(ConfiguredXMLPreferences), NewXMLEncoder(ConfiguredXMLPreferences)))) } func documentXMLSkipDirectivesScenario(w *bufio.Writer, s FormatScenario) { writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) if s.subdescription != "" { writeOrPanic(w, s.subdescription) writeOrPanic(w, "\n\n") } writeOrPanic(w, "Given a sample.xml file of:\n") writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input)) writeOrPanic(w, "then\n") writeOrPanic(w, "```bash\nyq --xml-skip-directives '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") prefs := NewDefaultXmlPreferences() prefs.SkipDirectives = true writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", mustProcessFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(prefs)))) } func TestXMLScenarios(t *testing.T) { for _, tt := range xmlScenarios { testXMLScenario(t, tt) } genericScenarios := make([]interface{}, len(xmlScenarios)) for i, s := range xmlScenarios { genericScenarios[i] = s } documentScenarios(t, "usage", "xml", genericScenarios, documentXMLScenario) }