diff --git a/pkg/yqlib/doc/usage/headers/xml.md b/pkg/yqlib/doc/usage/headers/xml.md index fd8e79d6..ce29869b 100644 --- a/pkg/yqlib/doc/usage/headers/xml.md +++ b/pkg/yqlib/doc/usage/headers/xml.md @@ -4,4 +4,29 @@ Encode and decode to and from XML. Whitespace is not conserved for round trips - Consecutive xml nodes with the same name are assumed to be arrays. -XML content data and attributes are created as fields. This can be controlled by the `'--xml-attribute-prefix` and `--xml-content-name` flags - see below for examples. +XML content data, attributes processing instructions and directives are all created as plain fields. + +This can be controlled by: + +| Flag | Default |Sample XML | +| -- | -- | -- | + | `--xml-attribute-prefix` | `+` (changing to `+@` soon) | Legs in `````` | + | `--xml-content-name` | `+content` | Meow in ```Meow true``` | + | `--xml-directive-name` | `+directive` | `````` | + | `--xml-proc-inst-prefix` | `+p_` | `````` | + + +## Encoder / Decoder flag options + +In addition to the above flags, there are the following xml encoder/decoder options controlled by flags: + +| Flag | Default | Description | +| -- | -- | -- | +| `--xml-strict-mode` | false | Strict mode enforces the requirements of the XML specification. When switched off the parser allows input containing common mistakes. See [the Golang xml decoder ](https://pkg.go.dev/encoding/xml#Decoder) for more details.| +| `--xml-keep-namespace` | true | Keeps the namespace of attributes | +| `--xml-raw-token` | true | Does not verify that start and end elements match and does not translate name space prefixes to their corresponding URLs. | +| `--xml-skip-proc-inst` | false | Skips over processing instructions, e.g. `` | +| `--xml-skip-directives` | false | Skips over directives, e.g. `````` | + + +See below for examples diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index f0d80bf4..5856f90a 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -4,7 +4,32 @@ Encode and decode to and from XML. Whitespace is not conserved for round trips - Consecutive xml nodes with the same name are assumed to be arrays. -XML content data and attributes are created as fields. This can be controlled by the `'--xml-attribute-prefix` and `--xml-content-name` flags - see below for examples. +XML content data, attributes processing instructions and directives are all created as plain fields. + +This can be controlled by: + +| Flag | Default |Sample XML | +| -- | -- | -- | + | `--xml-attribute-prefix` | `+` (changing to `+@` soon) | Legs in `````` | + | `--xml-content-name` | `+content` | Meow in ```Meow true``` | + | `--xml-directive-name` | `+directive` | `````` | + | `--xml-proc-inst-prefix` | `+p_` | `````` | + + +## Encoder / Decoder flag options + +In addition to the above flags, there are the following xml encoder/decoder options controlled by flags: + +| Flag | Default | Description | +| -- | -- | -- | +| `--xml-strict-mode` | false | Strict mode enforces the requirements of the XML specification. When switched off the parser allows input containing common mistakes. See [the Golang xml decoder ](https://pkg.go.dev/encoding/xml#Decoder) for more details.| +| `--xml-keep-namespace` | true | Keeps the namespace of attributes | +| `--xml-raw-token` | true | Does not verify that start and end elements match and does not translate name space prefixes to their corresponding URLs. | +| `--xml-skip-proc-inst` | false | Skips over processing instructions, e.g. `` | +| `--xml-skip-directives` | false | Skips over directives, e.g. `````` | + + +See below for examples {% hint style="warning" %} Note that versions prior to 4.18 require the 'eval/e' command to be specified. @@ -101,7 +126,7 @@ will output ```yaml +p_xml: version="1.0" encoding="UTF-8" cat: - +@legs: "4" + +legs: "4" legs: "7" ``` @@ -122,7 +147,7 @@ will output +p_xml: version="1.0" encoding="UTF-8" cat: +content: meow - +@legs: "4" + +legs: "4" ``` ## Parse xml: custom dtd @@ -320,7 +345,7 @@ Fields with the matching xml-attribute-prefix are assumed to be attributes. Given a sample.yml file of: ```yaml cat: - +@name: tiger + +name: tiger meows: true ``` @@ -341,7 +366,7 @@ Fields with the matching xml-content-name is assumed to be content. Given a sample.yml file of: ```yaml cat: - +@name: tiger + +name: tiger +content: cool ``` diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index 0c69a24e..af78d76e 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -58,7 +58,7 @@ cat: d: # in d before z: - +@sweet: cool + +sweet: cool # in d after # in y after # in_cat_after @@ -98,11 +98,11 @@ cat: d: - # in d before z: - +@sweet: cool + +sweet: cool # in d after - # in d2 before z: - +@sweet: cool2 + +sweet: cool2 # in d2 after # in y after # in_cat_after @@ -161,16 +161,16 @@ const inputXMLWithNamespacedAttr = ` const expectedYAMLWithNamespacedAttr = `+p_xml: version="1.0" map: - +@xmlns: some-namespace - +@xmlns:xsi: some-instance - +@some-instance:schemaLocation: some-url + +xmlns: some-namespace + +xmlns:xsi: some-instance + +some-instance:schemaLocation: some-url ` const expectedYAMLWithRawNamespacedAttr = `+p_xml: version="1.0" map: - +@xmlns: some-namespace - +@xmlns:xsi: some-instance - +@xsi:schemaLocation: some-url + +xmlns: some-namespace + +xmlns:xsi: some-instance + +xsi:schemaLocation: some-url ` const xmlWithCustomDtd = ` @@ -193,8 +193,9 @@ const expectedDtd = ` ` -const expectedSkippedDtd = ` - &writer;©right; +const expectedSkippedDtd = ` + + &writer;&copyright; ` @@ -227,32 +228,32 @@ var xmlScenarios = []formatScenario{ description: "Parse xml: simple", subdescription: "Notice how all the values are strings, see the next example on how you can fix that.", input: "\n\n meow\n 4\n true\n", - expected: "cat:\n says: meow\n legs: \"4\"\n cute: \"true\"\n", + expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: \"4\"\n cute: \"true\"\n", }, { description: "Parse xml: number", subdescription: "All values are assumed to be strings when parsing XML, but you can use the `from_yaml` operator on all the strings values to autoparse into the correct type.", input: "\n\n meow\n 4\n true\n", expression: " (.. | select(tag == \"!!str\")) |= from_yaml", - expected: "cat:\n says: meow\n legs: 4\n cute: true\n", + expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: 4\n cute: true\n", }, { description: "Parse xml: array", subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.", input: "\ncat\ngoat", - expected: "animal:\n - cat\n - goat\n", + expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\nanimal:\n - cat\n - goat\n", }, { description: "Parse xml: attributes", subdescription: "Attributes are converted to fields, with the default attribute prefix '+'. Use '--xml-attribute-prefix` to set your own.", input: "\n\n 7\n", - expected: "cat:\n +legs: \"4\"\n legs: \"7\"\n", + expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +legs: \"4\"\n legs: \"7\"\n", }, { description: "Parse xml: attributes with content", subdescription: "Content is added as a field, using the default content name of `+content`. Use `--xml-content-name` to set your own.", input: "\nmeow", - expected: "cat:\n +content: meow\n +legs: \"4\"\n", + expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +content: meow\n +legs: \"4\"\n", }, { description: "Parse xml: custom dtd", @@ -362,21 +363,21 @@ var xmlScenarios = []formatScenario{ { description: "Encode xml: attributes", subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.", - input: "cat:\n +@name: tiger\n meows: true\n", + input: "cat:\n +name: tiger\n meows: true\n", expected: "\n true\n\n", scenarioType: "encode", }, { description: "double prefix", skipDoc: true, - input: "cat:\n +@+@name: tiger\n meows: true\n", + input: "cat:\n ++@name: tiger\n meows: true\n", expected: "\n true\n\n", scenarioType: "encode", }, { description: "Encode xml: attributes with content", subdescription: "Fields with the matching xml-content-name is assumed to be content.", - input: "cat:\n +@name: tiger\n +content: cool\n", + input: "cat:\n +name: tiger\n +content: cool\n", expected: "cool\n", scenarioType: "encode", },