diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go
index 4a2a0c82..a3dffeb7 100644
--- a/pkg/yqlib/decoder_xml.go
+++ b/pkg/yqlib/decoder_xml.go
@@ -22,9 +22,11 @@ type xmlDecoder struct {
keepNamespace bool
useRawToken bool
finished bool
+ skipDirectives bool
+ skipProcInst bool
}
-func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool) Decoder {
+func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool, skipDirectives bool, skipProcInst bool) Decoder {
if contentName == "" {
contentName = "content"
}
@@ -37,6 +39,8 @@ func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool,
useRawToken: useRawToken,
directiveName: "_directive_",
procInstPrefix: "_procInst_",
+ skipDirectives: skipDirectives,
+ skipProcInst: skipProcInst,
}
}
@@ -285,9 +289,13 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
}
case xml.ProcInst:
- elem.n.AddChild(dec.procInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)})
+ if !dec.skipProcInst {
+ elem.n.AddChild(dec.procInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)})
+ }
case xml.Directive:
- elem.n.AddChild(dec.directiveName, &xmlNode{Data: string(se)})
+ if !dec.skipDirectives {
+ elem.n.AddChild(dec.directiveName, &xmlNode{Data: string(se)})
+ }
}
}
diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md
index 729c8dc1..3d668506 100644
--- a/pkg/yqlib/doc/usage/xml.md
+++ b/pkg/yqlib/doc/usage/xml.md
@@ -30,6 +30,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+_procInst_xml: version="1.0" encoding="UTF-8"
cat:
says: meow
legs: "4"
@@ -54,6 +55,7 @@ yq -p=xml ' (.. | select(tag == "!!str")) |= from_yaml' sample.xml
```
will output
```yaml
+_procInst_xml: version="1.0" encoding="UTF-8"
cat:
says: meow
legs: 4
@@ -75,6 +77,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+_procInst_xml: version="1.0" encoding="UTF-8"
animal:
- cat
- goat
@@ -96,6 +99,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+_procInst_xml: version="1.0" encoding="UTF-8"
cat:
+legs: "4"
legs: "7"
@@ -115,6 +119,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+_procInst_xml: version="1.0" encoding="UTF-8"
cat:
+content: meow
+legs: "4"
@@ -141,6 +146,12 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+_procInst_xml: version="1.0"
+_directive_: |-
+ DOCTYPE root [
+
+
+ ]
root:
item: '&writer;©right;'
```
@@ -207,11 +218,13 @@ yq -p=xml -o=xml --xml-keep-namespace '.' sample.xml
```
will output
```xml
+
```
instead of
```xml
+
```
@@ -230,11 +243,13 @@ yq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml
```
will output
```xml
+
```
instead of
```xml
+
```
@@ -339,6 +354,32 @@ will output
```
+## Encode: doctype and xml declaration
+Use the special xml names to add/modify proc instructions and directives.
+
+Given a sample.yml file of:
+```yaml
+_procInst_xml: version="1.0"
+_directive_: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
+apple:
+ _procInst_coolioo: version="1.0"
+ _directive_: 'CATYPE meow purr puss '
+ b: things
+
+```
+then
+```bash
+yq -o=xml '.' sample.yml
+```
+will output
+```xml
+
+
+
+ things
+
+```
+
## Round trip: with comments
A best effort is made, but comment positions and white space are not preserved perfectly.
@@ -380,3 +421,31 @@ in d before -->
```
+## Roundtrip: with doctype and declaration
+yq parses XML proc instructions and directives into nodes.
+Unfortunately the underlying XML parser loses whitespace information.
+
+Given a sample.xml file of:
+```xml
+
+
+
+
+
+ things
+
+
+```
+then
+```bash
+yq -p=xml -o=xml '.' sample.xml
+```
+will output
+```xml
+
+
+
+ things
+
+```
+
diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go
index caaf52de..34b85b15 100644
--- a/pkg/yqlib/xml_test.go
+++ b/pkg/yqlib/xml_test.go
@@ -159,13 +159,15 @@ const inputXMLWithNamespacedAttr = `
`
-const expectedYAMLWithNamespacedAttr = `map:
+const expectedYAMLWithNamespacedAttr = `_procInst_xml: version="1.0"
+map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+some-instance:schemaLocation: some-url
`
-const expectedYAMLWithRawNamespacedAttr = `map:
+const expectedYAMLWithRawNamespacedAttr = `_procInst_xml: version="1.0"
+map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+xsi:schemaLocation: some-url
@@ -181,10 +183,44 @@ const xmlWithCustomDtd = `
- &writer;©right;
`
-const expectedDtd = `root:
+const expectedDtd = `_procInst_xml: version="1.0"
+_directive_: |-
+ DOCTYPE root [
+
+
+ ]
+root:
item: '&writer;©right;'
`
+const expectedSkippedDtd = `root:
+ item: '&writer;©right;'
+`
+
+const xmlWithProcInstAndDirectives = `
+
+
+
+
+ things
+
+`
+
+const yamlWithProcInstAndDirectives = `_procInst_xml: version="1.0"
+_directive_: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
+apple:
+ _procInst_coolioo: version="1.0"
+ _directive_: 'CATYPE meow purr puss '
+ b: things
+`
+
+const expectedXmlWithProcInstAndDirectives = `
+
+
+ things
+
+`
+
var xmlScenarios = []formatScenario{
{
description: "Parse xml: simple",
@@ -219,10 +255,17 @@ var xmlScenarios = []formatScenario{
},
{
description: "Parse xml: custom dtd",
- subdescription: "DTD entities are ignored.",
+ subdescription: "DTD entities are processed as directives.",
input: xmlWithCustomDtd,
expected: expectedDtd,
},
+ {
+ description: "Parse xml: custom dtd",
+ subdescription: "DTD entities are processed as directives.",
+ input: xmlWithCustomDtd,
+ expected: expectedSkippedDtd,
+ scenarioType: "c",
+ },
{
description: "Parse xml: with comments",
subdescription: "A best attempt is made to preserve comments.",
@@ -341,6 +384,13 @@ var xmlScenarios = []formatScenario{
expected: expectedXMLWithComments,
scenarioType: "encode",
},
+ {
+ description: "Encode: doctype and xml declaration",
+ subdescription: "Use the special xml names to add/modify proc instructions and directives.",
+ input: yamlWithProcInstAndDirectives,
+ expected: expectedXmlWithProcInstAndDirectives,
+ scenarioType: "encode",
+ },
{
description: "Round trip: with comments",
subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
@@ -348,21 +398,29 @@ var xmlScenarios = []formatScenario{
expected: expectedRoundtripXMLWithComments,
scenarioType: "roundtrip",
},
+ {
+ description: "Roundtrip: with doctype and declaration",
+ subdescription: "yq parses XML proc instructions and directives into nodes.\nUnfortunately the underlying XML parser loses whitespace information.",
+ input: xmlWithProcInstAndDirectives,
+ expected: expectedXmlWithProcInstAndDirectives,
+ scenarioType: "roundtrip",
+ },
}
func testXMLScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "", "decode":
- test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewYamlEncoder(4, false, true, true)), s.description)
+ test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewYamlEncoder(4, false, true, true)), s.description)
case "encode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content")), s.description)
case "roundtrip":
- test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")), s.description)
+ test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content")), s.description)
case "decode-keep-ns":
- test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewYamlEncoder(2, false, true, true)), s.description)
+ test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false, false, false), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-raw-token":
- test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewYamlEncoder(2, false, true, true)), s.description)
-
+ test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, false, false), NewYamlEncoder(2, false, true, true)), s.description)
+ case "encode-":
+ test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, true, true), NewYamlEncoder(2, false, true, true)), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
@@ -428,10 +486,10 @@ func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
- writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewXMLEncoder(2, "+", "+content"))))
+ writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, "instead of\n")
- writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
+ writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content"))))
}
func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario) {
@@ -449,10 +507,10 @@ func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario)
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
- writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewXMLEncoder(2, "+", "+content"))))
+ writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, "instead of\n")
- writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
+ writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content"))))
}
func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
@@ -488,7 +546,7 @@ func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
- writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
+ writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content"))))
}
func TestXMLScenarios(t *testing.T) {