wip better comment parsing

This commit is contained in:
Mike Farah 2021-12-22 11:39:10 +11:00
parent a72743f9c9
commit e869f4b81f
2 changed files with 308 additions and 72 deletions

View File

@ -22,3 +22,221 @@ XML nodes that have attributes then plain content, e.g:
The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this. The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this.
## Parse xml: simple
Given a sample.xml file of:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<cat>meow</cat>
```
then
```bash
yq e -p=xml '.' sample.xml
```
will output
```yaml
cat: meow
```
## Parse xml: array
Consecutive nodes with identical xml names are assumed to be arrays.
Given a sample.xml file of:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<animal>1</animal>
<animal>2</animal>
```
then
```bash
yq e -p=xml '.' sample.xml
```
will output
```yaml
animal:
- "1"
- "2"
```
## Parse xml: attributes
Attributes are converted to fields, with the attribute prefix.
Given a sample.xml file of:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<cat legs="4">
<legs>7</legs>
</cat>
```
then
```bash
yq e -p=xml '.' sample.xml
```
will output
```yaml
cat:
+legs: "4"
legs: "7"
```
## Parse xml: attributes with content
Content is added as a field, using the content name
Given a sample.xml file of:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<cat legs="4">meow</cat>
```
then
```bash
yq e -p=xml '.' sample.xml
```
will output
```yaml
cat:
+content: meow
+legs: "4"
```
## Parse xml: with comments
A best attempt is made to preserve comments.
Given a sample.xml file of:
```xml
<!-- before cat -->
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x>
<y>
<!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
</cat>
<!-- after cat -->
```
then
```bash
yq e -p=xml '.' sample.xml
```
will output
```yaml
# before cat
cat:
# in cat before
x: "3" # multi
# line comment
# for x
y:
# in y before
d: "4" # in d before in d after
# in y after
# after cat
```
## Encode xml: simple
Given a sample.yml file of:
```yaml
cat: purrs
```
then
```bash
yq e -o=xml '.' sample.yml
```
will output
```xml
<cat>purrs</cat>
```
## Encode xml: array
Given a sample.yml file of:
```yaml
pets:
cat:
- purrs
- meows
```
then
```bash
yq e -o=xml '.' sample.yml
```
will output
```xml
<pets>
<cat>purrs</cat>
<cat>meows</cat>
</pets>
```
## Encode xml: attributes
Fields with the matching xml-attribute-prefix are assumed to be attributes.
Given a sample.yml file of:
```yaml
cat:
+name: tiger
meows: true
```
then
```bash
yq e -o=xml '.' sample.yml
```
will output
```xml
<cat name="tiger">
<meows>true</meows>
</cat>
```
## Encode xml: attributes with content
Fields with the matching xml-content-name is assumed to be content.
Given a sample.yml file of:
```yaml
cat:
+name: tiger
+content: cool
```
then
```bash
yq e -o=xml '.' sample.yml
```
will output
```xml
<cat name="tiger">cool</cat>
```
## Encode xml: comments
A best attempt is made to copy comments to xml.
Given a sample.yml file of:
```yaml
# above_cat
cat: # inline_cat
# above_array
array: # inline_array
- val1 # inline_val1
# above_val2
- val2 # inline_val2
# below_cat
```
then
```bash
yq e -o=xml '.' sample.yml
```
will output
```xml
<!-- above_cat inline_cat--><cat><!-- above_array inline_array-->
<array><!-- inline_val1-->val1</array>
<array><!-- above_val2 inline_val2-->val2</array>
</cat><!-- below_cat-->
```

View File

@ -58,16 +58,33 @@ type xmlScenario struct {
scenarioType string scenarioType string
} }
var inputXmlWithComments = `
<!-- before cat -->
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x>
<y>
<!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
</cat>
<!-- after cat -->
`
var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat
cat: cat:
# in cat # in cat before
x: "3" # xca x: "3" # multi
# cool # line comment
# smart # for x
y: y:
# befored # in y before
d: "4" # ind ind2 d: "4" # in d before in d after
# afterd # in y after
# after cat # after cat
` `
@ -89,74 +106,75 @@ var expectedXmlWithComments = `<!-- above_cat inline_cat--><cat><!-- above_array
` `
var xmlScenarios = []xmlScenario{ var xmlScenarios = []xmlScenario{
// {
// description: "Parse xml: simple",
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>meow</cat>",
// expected: "D0, P[], (doc)::cat: meow\n",
// },
// {
// description: "Parse xml: array",
// subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>1</animal>\n<animal>2</animal>",
// expected: "D0, P[], (doc)::animal:\n - \"1\"\n - \"2\"\n",
// },
// {
// description: "Parse xml: attributes",
// subdescription: "Attributes are converted to fields, with the attribute prefix.",
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
// expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n",
// },
// {
// description: "Parse xml: attributes with content",
// subdescription: "Content is added as a field, using the content name",
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
// expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
// },
{ {
skipDoc: true, description: "Parse xml: simple",
input: "<!-- before cat --><cat><!-- in cat --><x>3<!--xca\ncool\nsmart --></x><y><!-- befored --><d><!-- ind -->4<!-- ind2 --></d><!-- afterd --></y><!-- after --></cat><!-- after cat -->", input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>meow</cat>",
expected: "D0, P[], (doc)::cat: meow\n",
},
{
description: "Parse xml: array",
subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>1</animal>\n<animal>2</animal>",
expected: "D0, P[], (doc)::animal:\n - \"1\"\n - \"2\"\n",
},
{
description: "Parse xml: attributes",
subdescription: "Attributes are converted to fields, with the attribute prefix.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n",
},
{
description: "Parse xml: attributes with content",
subdescription: "Content is added as a field, using the content name",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
},
{
description: "Parse xml: with comments",
subdescription: "A best attempt is made to preserve comments.",
input: inputXmlWithComments,
expected: expectedDecodeYamlWithComments, expected: expectedDecodeYamlWithComments,
scenarioType: "decode", scenarioType: "decode",
}, },
// { {
// description: "Encode xml: simple", description: "Encode xml: simple",
// input: "cat: purrs", input: "cat: purrs",
// expected: "<cat>purrs</cat>\n", expected: "<cat>purrs</cat>\n",
// scenarioType: "encode", scenarioType: "encode",
// }, },
// { {
// description: "Encode xml: array", description: "Encode xml: array",
// input: "pets:\n cat:\n - purrs\n - meows", input: "pets:\n cat:\n - purrs\n - meows",
// expected: "<pets>\n <cat>purrs</cat>\n <cat>meows</cat>\n</pets>\n", expected: "<pets>\n <cat>purrs</cat>\n <cat>meows</cat>\n</pets>\n",
// scenarioType: "encode", scenarioType: "encode",
// }, },
// { {
// description: "Encode xml: attributes", description: "Encode xml: attributes",
// subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.", subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.",
// input: "cat:\n +name: tiger\n meows: true\n", input: "cat:\n +name: tiger\n meows: true\n",
// expected: "<cat name=\"tiger\">\n <meows>true</meows>\n</cat>\n", expected: "<cat name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
// scenarioType: "encode", scenarioType: "encode",
// }, },
// { {
// skipDoc: true, skipDoc: true,
// input: "cat:\n ++name: tiger\n meows: true\n", input: "cat:\n ++name: tiger\n meows: true\n",
// expected: "<cat +name=\"tiger\">\n <meows>true</meows>\n</cat>\n", expected: "<cat +name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
// scenarioType: "encode", scenarioType: "encode",
// }, },
// { {
// description: "Encode xml: attributes with content", description: "Encode xml: attributes with content",
// subdescription: "Fields with the matching xml-content-name is assumed to be content.", subdescription: "Fields with the matching xml-content-name is assumed to be content.",
// input: "cat:\n +name: tiger\n +content: cool\n", input: "cat:\n +name: tiger\n +content: cool\n",
// expected: "<cat name=\"tiger\">cool</cat>\n", expected: "<cat name=\"tiger\">cool</cat>\n",
// scenarioType: "encode", scenarioType: "encode",
// }, },
// { {
// description: "Encode xml: comments", description: "Encode xml: comments",
// subdescription: "A best attempt is made to copy comments to xml.", subdescription: "A best attempt is made to copy comments to xml.",
// input: yamlWithComments, input: yamlWithComments,
// expected: expectedXmlWithComments, expected: expectedXmlWithComments,
// scenarioType: "encode", scenarioType: "encode",
// }, },
// { // {
// skipDoc: true, // skipDoc: true,
// input: "<!-- beforeCat --><cat><!-- in cat -->value<!-- after --></cat><!-- after cat -->", // input: "<!-- beforeCat --><cat><!-- in cat -->value<!-- after --></cat><!-- after cat -->",