This commit is contained in:
Mike Farah 2022-10-18 14:56:50 +11:00
parent 6bbab2a664
commit 2e9c91f8a1
3 changed files with 152 additions and 17 deletions

View File

@ -22,9 +22,11 @@ type xmlDecoder struct {
keepNamespace bool
useRawToken bool
finished bool
skipDirectives bool
skipProcInst bool
}
func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool) Decoder {
func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool, skipDirectives bool, skipProcInst bool) Decoder {
if contentName == "" {
contentName = "content"
}
@ -37,6 +39,8 @@ func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool,
useRawToken: useRawToken,
directiveName: "_directive_",
procInstPrefix: "_procInst_",
skipDirectives: skipDirectives,
skipProcInst: skipProcInst,
}
}
@ -285,9 +289,13 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
}
case xml.ProcInst:
elem.n.AddChild(dec.procInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)})
if !dec.skipProcInst {
elem.n.AddChild(dec.procInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)})
}
case xml.Directive:
elem.n.AddChild(dec.directiveName, &xmlNode{Data: string(se)})
if !dec.skipDirectives {
elem.n.AddChild(dec.directiveName, &xmlNode{Data: string(se)})
}
}
}

View File

@ -30,6 +30,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
_procInst_xml: version="1.0" encoding="UTF-8"
cat:
says: meow
legs: "4"
@ -54,6 +55,7 @@ yq -p=xml ' (.. | select(tag == "!!str")) |= from_yaml' sample.xml
```
will output
```yaml
_procInst_xml: version="1.0" encoding="UTF-8"
cat:
says: meow
legs: 4
@ -75,6 +77,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
_procInst_xml: version="1.0" encoding="UTF-8"
animal:
- cat
- goat
@ -96,6 +99,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
_procInst_xml: version="1.0" encoding="UTF-8"
cat:
+legs: "4"
legs: "7"
@ -115,6 +119,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
_procInst_xml: version="1.0" encoding="UTF-8"
cat:
+content: meow
+legs: "4"
@ -141,6 +146,12 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
_procInst_xml: version="1.0"
_directive_: |-
DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]
root:
item: '&writer;&copyright;'
```
@ -207,11 +218,13 @@ yq -p=xml -o=xml --xml-keep-namespace '.' sample.xml
```
will output
```xml
<?xml version="1.0"?>
<map xmlns="some-namespace" xmlns:xsi="some-instance" some-instance:schemaLocation="some-url"></map>
```
instead of
```xml
<?xml version="1.0"?>
<map xmlns="some-namespace" xsi="some-instance" schemaLocation="some-url"></map>
```
@ -230,11 +243,13 @@ yq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml
```
will output
```xml
<?xml version="1.0"?>
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url"></map>
```
instead of
```xml
<?xml version="1.0"?>
<map xmlns="some-namespace" xsi="some-instance" schemaLocation="some-url"></map>
```
@ -339,6 +354,32 @@ will output
</cat><!-- below_cat -->
```
## Encode: doctype and xml declaration
Use the special xml names to add/modify proc instructions and directives.
Given a sample.yml file of:
```yaml
_procInst_xml: version="1.0"
_directive_: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
apple:
_procInst_coolioo: version="1.0"
_directive_: 'CATYPE meow purr puss '
b: things
```
then
```bash
yq -o=xml '.' sample.yml
```
will output
```xml
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
```
## Round trip: with comments
A best effort is made, but comment positions and white space are not preserved perfectly.
@ -380,3 +421,31 @@ in d before -->
</cat><!-- after cat -->
```
## Roundtrip: with doctype and declaration
yq parses XML proc instructions and directives into nodes.
Unfortunately the underlying XML parser loses whitespace information.
Given a sample.xml file of:
```xml
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple>
<?coolioo version="1.0"?>
<!CATYPE meow purr puss >
<b>things</b>
</apple>
```
then
```bash
yq -p=xml -o=xml '.' sample.xml
```
will output
```xml
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
```

View File

@ -159,13 +159,15 @@ const inputXMLWithNamespacedAttr = `
</map>
`
const expectedYAMLWithNamespacedAttr = `map:
const expectedYAMLWithNamespacedAttr = `_procInst_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+some-instance:schemaLocation: some-url
`
const expectedYAMLWithRawNamespacedAttr = `map:
const expectedYAMLWithRawNamespacedAttr = `_procInst_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+xsi:schemaLocation: some-url
@ -181,10 +183,44 @@ const xmlWithCustomDtd = `
<item>&writer;&copyright;</item>
</root>`
const expectedDtd = `root:
const expectedDtd = `_procInst_xml: version="1.0"
_directive_: |-
DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]
root:
item: '&writer;&copyright;'
`
const expectedSkippedDtd = `root:
item: '&writer;&copyright;'
`
const xmlWithProcInstAndDirectives = `<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple>
<?coolioo version="1.0"?>
<!CATYPE meow purr puss >
<b>things</b>
</apple>
`
const yamlWithProcInstAndDirectives = `_procInst_xml: version="1.0"
_directive_: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
apple:
_procInst_coolioo: version="1.0"
_directive_: 'CATYPE meow purr puss '
b: things
`
const expectedXmlWithProcInstAndDirectives = `<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
`
var xmlScenarios = []formatScenario{
{
description: "Parse xml: simple",
@ -219,10 +255,17 @@ var xmlScenarios = []formatScenario{
},
{
description: "Parse xml: custom dtd",
subdescription: "DTD entities are ignored.",
subdescription: "DTD entities are processed as directives.",
input: xmlWithCustomDtd,
expected: expectedDtd,
},
{
description: "Parse xml: custom dtd",
subdescription: "DTD entities are processed as directives.",
input: xmlWithCustomDtd,
expected: expectedSkippedDtd,
scenarioType: "c",
},
{
description: "Parse xml: with comments",
subdescription: "A best attempt is made to preserve comments.",
@ -341,6 +384,13 @@ var xmlScenarios = []formatScenario{
expected: expectedXMLWithComments,
scenarioType: "encode",
},
{
description: "Encode: doctype and xml declaration",
subdescription: "Use the special xml names to add/modify proc instructions and directives.",
input: yamlWithProcInstAndDirectives,
expected: expectedXmlWithProcInstAndDirectives,
scenarioType: "encode",
},
{
description: "Round trip: with comments",
subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
@ -348,21 +398,29 @@ var xmlScenarios = []formatScenario{
expected: expectedRoundtripXMLWithComments,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: with doctype and declaration",
subdescription: "yq parses XML proc instructions and directives into nodes.\nUnfortunately the underlying XML parser loses whitespace information.",
input: xmlWithProcInstAndDirectives,
expected: expectedXmlWithProcInstAndDirectives,
scenarioType: "roundtrip",
},
}
func testXMLScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "", "decode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewYamlEncoder(4, false, true, true)), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewYamlEncoder(4, false, true, true)), s.description)
case "encode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content")), s.description)
case "roundtrip":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content")), s.description)
case "decode-keep-ns":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewYamlEncoder(2, false, true, true)), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false, false, false), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-raw-token":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewYamlEncoder(2, false, true, true)), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, false, false), NewYamlEncoder(2, false, true, true)), s.description)
case "encode-":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, true, true), NewYamlEncoder(2, false, true, true)), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
@ -428,10 +486,10 @@ func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, "instead of\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content"))))
}
func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario) {
@ -449,10 +507,10 @@ func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario)
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, "instead of\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content"))))
}
func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
@ -488,7 +546,7 @@ func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false, false, false), NewXMLEncoder(2, "+", "+content"))))
}
func TestXMLScenarios(t *testing.T) {