diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go index 4a2a0c82..a3dffeb7 100644 --- a/pkg/yqlib/decoder_xml.go +++ b/pkg/yqlib/decoder_xml.go @@ -22,9 +22,11 @@ type xmlDecoder struct { keepNamespace bool useRawToken bool finished bool + skipDirectives bool + skipProcInst bool } -func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool) Decoder { +func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool, skipDirectives bool, skipProcInst bool) Decoder { if contentName == "" { contentName = "content" } @@ -37,6 +39,8 @@ func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, useRawToken: useRawToken, directiveName: "_directive_", procInstPrefix: "_procInst_", + skipDirectives: skipDirectives, + skipProcInst: skipProcInst, } } @@ -285,9 +289,13 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error { } case xml.ProcInst: - elem.n.AddChild(dec.procInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)}) + if !dec.skipProcInst { + elem.n.AddChild(dec.procInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)}) + } case xml.Directive: - elem.n.AddChild(dec.directiveName, &xmlNode{Data: string(se)}) + if !dec.skipDirectives { + elem.n.AddChild(dec.directiveName, &xmlNode{Data: string(se)}) + } } } diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index 729c8dc1..3d668506 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -30,6 +30,7 @@ yq -p=xml '.' sample.xml ``` will output ```yaml +_procInst_xml: version="1.0" encoding="UTF-8" cat: says: meow legs: "4" @@ -54,6 +55,7 @@ yq -p=xml ' (.. | select(tag == "!!str")) |= from_yaml' sample.xml ``` will output ```yaml +_procInst_xml: version="1.0" encoding="UTF-8" cat: says: meow legs: 4 @@ -75,6 +77,7 @@ yq -p=xml '.' sample.xml ``` will output ```yaml +_procInst_xml: version="1.0" encoding="UTF-8" animal: - cat - goat @@ -96,6 +99,7 @@ yq -p=xml '.' sample.xml ``` will output ```yaml +_procInst_xml: version="1.0" encoding="UTF-8" cat: +legs: "4" legs: "7" @@ -115,6 +119,7 @@ yq -p=xml '.' sample.xml ``` will output ```yaml +_procInst_xml: version="1.0" encoding="UTF-8" cat: +content: meow +legs: "4" @@ -141,6 +146,12 @@ yq -p=xml '.' sample.xml ``` will output ```yaml +_procInst_xml: version="1.0" +_directive_: |- + DOCTYPE root [ + + + ] root: item: '&writer;©right;' ``` @@ -207,11 +218,13 @@ yq -p=xml -o=xml --xml-keep-namespace '.' sample.xml ``` will output ```xml + ``` instead of ```xml + ``` @@ -230,11 +243,13 @@ yq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml ``` will output ```xml + ``` instead of ```xml + ``` @@ -339,6 +354,32 @@ will output ``` +## Encode: doctype and xml declaration +Use the special xml names to add/modify proc instructions and directives. + +Given a sample.yml file of: +```yaml +_procInst_xml: version="1.0" +_directive_: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" ' +apple: + _procInst_coolioo: version="1.0" + _directive_: 'CATYPE meow purr puss ' + b: things + +``` +then +```bash +yq -o=xml '.' sample.yml +``` +will output +```xml + + + + things + +``` + ## Round trip: with comments A best effort is made, but comment positions and white space are not preserved perfectly. @@ -380,3 +421,31 @@ in d before --> ``` +## Roundtrip: with doctype and declaration +yq parses XML proc instructions and directives into nodes. +Unfortunately the underlying XML parser loses whitespace information. + +Given a sample.xml file of: +```xml + + + + + + things + + +``` +then +```bash +yq -p=xml -o=xml '.' sample.xml +``` +will output +```xml + + + + things + +``` + diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index caaf52de..34b85b15 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -159,13 +159,15 @@ const inputXMLWithNamespacedAttr = ` ` -const expectedYAMLWithNamespacedAttr = `map: +const expectedYAMLWithNamespacedAttr = `_procInst_xml: version="1.0" +map: +xmlns: some-namespace +xmlns:xsi: some-instance +some-instance:schemaLocation: some-url ` -const expectedYAMLWithRawNamespacedAttr = `map: +const expectedYAMLWithRawNamespacedAttr = `_procInst_xml: version="1.0" +map: +xmlns: some-namespace +xmlns:xsi: some-instance +xsi:schemaLocation: some-url @@ -181,10 +183,44 @@ const xmlWithCustomDtd = ` &writer;©right; ` -const expectedDtd = `root: +const expectedDtd = `_procInst_xml: version="1.0" +_directive_: |- + DOCTYPE root [ + + + ] +root: item: '&writer;©right;' ` +const expectedSkippedDtd = `root: + item: '&writer;©right;' +` + +const xmlWithProcInstAndDirectives = ` + + + + + things + +` + +const yamlWithProcInstAndDirectives = `_procInst_xml: version="1.0" +_directive_: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" ' +apple: + _procInst_coolioo: version="1.0" + _directive_: 'CATYPE meow purr puss ' + b: things +` + +const expectedXmlWithProcInstAndDirectives = ` + + + things + +` + var xmlScenarios = []formatScenario{ { description: "Parse xml: simple", @@ -219,10 +255,17 @@ var xmlScenarios = []formatScenario{ }, { description: "Parse xml: custom dtd", - subdescription: "DTD entities are ignored.", + subdescription: "DTD entities are processed as directives.", input: xmlWithCustomDtd, expected: expectedDtd, }, + { + description: "Parse xml: custom dtd", + subdescription: "DTD entities are processed as directives.", + input: xmlWithCustomDtd, + expected: expectedSkippedDtd, + scenarioType: "c", + }, { description: "Parse xml: with comments", subdescription: "A best attempt is made to preserve comments.", @@ -341,6 +384,13 @@ var xmlScenarios = []formatScenario{ expected: expectedXMLWithComments, scenarioType: "encode", }, + { + description: "Encode: doctype and xml declaration", + subdescription: "Use the special xml names to add/modify proc instructions and directives.", + input: yamlWithProcInstAndDirectives, + expected: expectedXmlWithProcInstAndDirectives, + scenarioType: "encode", + }, { description: "Round trip: with comments", subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.", @@ -348,21 +398,29 @@ var xmlScenarios = []formatScenario{ expected: expectedRoundtripXMLWithComments, scenarioType: "roundtrip", }, + { + description: "Roundtrip: with doctype and declaration", + subdescription: "yq parses XML proc instructions and directives into nodes.\nUnfortunately the underlying XML parser loses whitespace information.", + input: xmlWithProcInstAndDirectives, + expected: expectedXmlWithProcInstAndDirectives, + scenarioType: "roundtrip", + }, } func testXMLScenario(t *testing.T, s formatScenario) { switch s.scenarioType { case "", "decode": - test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewYamlEncoder(4, false, true, true)), s.description) + test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewYamlEncoder(4, false, true, true)), s.description) case "encode": test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content")), s.description) case "roundtrip": - test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")), s.description) + test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content")), s.description) case "decode-keep-ns": - test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewYamlEncoder(2, false, true, true)), s.description) + test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false, false, false), NewYamlEncoder(2, false, true, true)), s.description) case "decode-raw-token": - test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewYamlEncoder(2, false, true, true)), s.description) - + test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, false, false), NewYamlEncoder(2, false, true, true)), s.description) + case "encode-": + test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, true, true), NewYamlEncoder(2, false, true, true)), s.description) default: panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType)) } @@ -428,10 +486,10 @@ func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s formatScenario) { writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewXMLEncoder(2, "+", "+content")))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false, false, false), NewXMLEncoder(2, "+", "+content")))) writeOrPanic(w, "instead of\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content")))) } func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario) { @@ -449,10 +507,10 @@ func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario) writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewXMLEncoder(2, "+", "+content")))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, false, false), NewXMLEncoder(2, "+", "+content")))) writeOrPanic(w, "instead of\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content")))) } func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) { @@ -488,7 +546,7 @@ func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) { writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n") writeOrPanic(w, "will output\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content")))) } func TestXMLScenarios(t *testing.T) {