mirror of
https://github.com/mikefarah/yq.git
synced 2024-12-19 20:19:04 +00:00
wip better comment parsing
This commit is contained in:
parent
0881ce2476
commit
a72743f9c9
@ -4,6 +4,7 @@ import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
@ -61,17 +62,19 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
|
||||
}
|
||||
|
||||
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
|
||||
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.Comment}
|
||||
log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment)
|
||||
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.HeadComment}
|
||||
|
||||
if len(n.Data) > 0 {
|
||||
label := dec.contentPrefix
|
||||
yamlNode.Content = append(yamlNode.Content, createScalarNode(label, label), createScalarNode(n.Data, n.Data))
|
||||
}
|
||||
|
||||
for _, keyValuePair := range n.Children {
|
||||
for i, keyValuePair := range n.Children {
|
||||
label := keyValuePair.K
|
||||
children := keyValuePair.V
|
||||
labelNode := createScalarNode(label, label)
|
||||
// labelNode.HeadComment = n.HeadComment
|
||||
var valueNode *yaml.Node
|
||||
var err error
|
||||
log.Debug("len of children in %v is %v", label, len(children))
|
||||
@ -81,10 +84,15 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
|
||||
valueNode, err = dec.convertToYamlNode(children[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if i == len(n.Children)-1 {
|
||||
valueNode.FootComment = n.FootComment
|
||||
}
|
||||
}
|
||||
yamlNode.Content = append(yamlNode.Content, labelNode, valueNode)
|
||||
}
|
||||
@ -97,7 +105,9 @@ func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
|
||||
return dec.createMap(n)
|
||||
}
|
||||
scalar := createScalarNode(n.Data, n.Data)
|
||||
scalar.HeadComment = n.Comment
|
||||
log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment)
|
||||
scalar.LineComment = n.HeadComment
|
||||
|
||||
return scalar, nil
|
||||
}
|
||||
|
||||
@ -124,9 +134,10 @@ func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error {
|
||||
}
|
||||
|
||||
type xmlNode struct {
|
||||
Children []*xmlChildrenKv
|
||||
Comment string
|
||||
Data string
|
||||
Children []*xmlChildrenKv
|
||||
HeadComment string
|
||||
FootComment string
|
||||
Data string
|
||||
}
|
||||
|
||||
type xmlChildrenKv struct {
|
||||
@ -158,6 +169,7 @@ type element struct {
|
||||
parent *element
|
||||
n *xmlNode
|
||||
label string
|
||||
state string
|
||||
}
|
||||
|
||||
// this code is heavily based on https://github.com/basgys/goxml2json
|
||||
@ -183,6 +195,8 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
|
||||
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
log.Debug("start element %v", se.Name.Local)
|
||||
elem.state = "started"
|
||||
// Build new a new current element and link it to its parent
|
||||
elem = &element{
|
||||
parent: elem,
|
||||
@ -198,6 +212,8 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
|
||||
// Extract XML data (if any)
|
||||
elem.n.Data = trimNonGraphic(string(se))
|
||||
case xml.EndElement:
|
||||
log.Debug("end element %v", elem.label)
|
||||
elem.state = "finished"
|
||||
// And add it to its parent list
|
||||
if elem.parent != nil {
|
||||
elem.parent.n.AddChild(elem.label, elem.n)
|
||||
@ -206,13 +222,32 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
|
||||
// Then change the current element to its parent
|
||||
elem = elem.parent
|
||||
case xml.Comment:
|
||||
elem.n.Comment = trimNonGraphic(string(xml.CharData(se)))
|
||||
|
||||
commentStr := trimNonGraphic(string(xml.CharData(se)))
|
||||
if elem.state == "started" {
|
||||
log.Debug("got a foot comment for %v: %v", elem.label, commentStr)
|
||||
elem.n.FootComment = commentStr
|
||||
} else {
|
||||
log.Debug("got a head comment for %v: %v", elem.label, commentStr)
|
||||
elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr})
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinFilter(rawStrings []string) string {
|
||||
stringsToJoin := make([]string, 0)
|
||||
for _, str := range rawStrings {
|
||||
if str != "" {
|
||||
stringsToJoin = append(stringsToJoin, str)
|
||||
}
|
||||
}
|
||||
return strings.Join(stringsToJoin, " ")
|
||||
}
|
||||
|
||||
// trimNonGraphic returns a slice of the string s, with all leading and trailing
|
||||
// non graphic characters and spaces removed.
|
||||
//
|
||||
|
@ -22,179 +22,3 @@ XML nodes that have attributes then plain content, e.g:
|
||||
|
||||
The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this.
|
||||
|
||||
## Parse xml: simple
|
||||
Given a sample.xml file of:
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<cat>meow</cat>
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -p=xml '.' sample.xml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
cat: meow
|
||||
```
|
||||
|
||||
## Parse xml: array
|
||||
Consecutive nodes with identical xml names are assumed to be arrays.
|
||||
|
||||
Given a sample.xml file of:
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<animal>1</animal>
|
||||
<animal>2</animal>
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -p=xml '.' sample.xml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
animal:
|
||||
- "1"
|
||||
- "2"
|
||||
```
|
||||
|
||||
## Parse xml: attributes
|
||||
Attributes are converted to fields, with the attribute prefix.
|
||||
|
||||
Given a sample.xml file of:
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<cat legs="4">
|
||||
<legs>7</legs>
|
||||
</cat>
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -p=xml '.' sample.xml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
cat:
|
||||
+legs: "4"
|
||||
legs: "7"
|
||||
```
|
||||
|
||||
## Parse xml: attributes with content
|
||||
Content is added as a field, using the content name
|
||||
|
||||
Given a sample.xml file of:
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<cat legs="4">meow</cat>
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -p=xml '.' sample.xml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
cat:
|
||||
+content: meow
|
||||
+legs: "4"
|
||||
```
|
||||
|
||||
## Encode xml: simple
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
cat: purrs
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -o=xml '.' sample.yml
|
||||
```
|
||||
will output
|
||||
```xml
|
||||
<cat>purrs</cat>
|
||||
```
|
||||
|
||||
## Encode xml: array
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
pets:
|
||||
cat:
|
||||
- purrs
|
||||
- meows
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -o=xml '.' sample.yml
|
||||
```
|
||||
will output
|
||||
```xml
|
||||
<pets>
|
||||
<cat>purrs</cat>
|
||||
<cat>meows</cat>
|
||||
</pets>
|
||||
```
|
||||
|
||||
## Encode xml: attributes
|
||||
Fields with the matching xml-attribute-prefix are assumed to be attributes.
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
cat:
|
||||
+name: tiger
|
||||
meows: true
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -o=xml '.' sample.yml
|
||||
```
|
||||
will output
|
||||
```xml
|
||||
<cat name="tiger">
|
||||
<meows>true</meows>
|
||||
</cat>
|
||||
```
|
||||
|
||||
## Encode xml: attributes with content
|
||||
Fields with the matching xml-content-name is assumed to be content.
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
cat:
|
||||
+name: tiger
|
||||
+content: cool
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -o=xml '.' sample.yml
|
||||
```
|
||||
will output
|
||||
```xml
|
||||
<cat name="tiger">cool</cat>
|
||||
```
|
||||
|
||||
## Encode xml: comments
|
||||
A best attempt is made to copy comments to xml.
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
# above_cat
|
||||
cat: # inline_cat
|
||||
# above_array
|
||||
array: # inline_array
|
||||
- val1 # inline_val1
|
||||
# above_val2
|
||||
- val2 # inline_val2
|
||||
# below_cat
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq e -o=xml '.' sample.yml
|
||||
```
|
||||
will output
|
||||
```xml
|
||||
<!-- above_cat inline_cat--><cat><!-- above_array inline_array-->
|
||||
<array><!-- inline_val1-->val1</array>
|
||||
<array><!-- above_val2 inline_val2-->val2</array>
|
||||
</cat><!-- below_cat-->
|
||||
```
|
||||
|
||||
|
@ -24,12 +24,18 @@ func decodeXml(t *testing.T, xml string) *CandidateNode {
|
||||
return &CandidateNode{Node: node}
|
||||
}
|
||||
|
||||
func yamlToXml(sampleYaml string, indent int) string {
|
||||
func processScenario(s xmlScenario) string {
|
||||
var output bytes.Buffer
|
||||
writer := bufio.NewWriter(&output)
|
||||
|
||||
var encoder = NewXmlEncoder(writer, indent, "+", "+content")
|
||||
inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder())
|
||||
var encoder = NewXmlEncoder(writer, 2, "+", "+content")
|
||||
|
||||
var decoder = NewYamlDecoder()
|
||||
if s.scenarioType == "roundtrip" {
|
||||
decoder = NewXmlDecoder("+", "+content")
|
||||
}
|
||||
|
||||
inputs, err := readDocuments(strings.NewReader(s.input), "sample.yml", 0, decoder)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -49,10 +55,24 @@ type xmlScenario struct {
|
||||
description string
|
||||
subdescription string
|
||||
skipDoc bool
|
||||
encodeScenario bool
|
||||
scenarioType string
|
||||
}
|
||||
|
||||
var yamlWithComments = `need to fix leadingContent thing. This should fail.# above_cat
|
||||
var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat
|
||||
cat:
|
||||
# in cat
|
||||
x: "3" # xca
|
||||
# cool
|
||||
# smart
|
||||
y:
|
||||
# befored
|
||||
d: "4" # ind ind2
|
||||
# afterd
|
||||
|
||||
# after cat
|
||||
`
|
||||
|
||||
var yamlWithComments = `# above_cat
|
||||
cat: # inline_cat
|
||||
# above_array
|
||||
array: # inline_array
|
||||
@ -69,73 +89,85 @@ var expectedXmlWithComments = `<!-- above_cat inline_cat--><cat><!-- above_array
|
||||
`
|
||||
|
||||
var xmlScenarios = []xmlScenario{
|
||||
// {
|
||||
// description: "Parse xml: simple",
|
||||
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>meow</cat>",
|
||||
// expected: "D0, P[], (doc)::cat: meow\n",
|
||||
// },
|
||||
// {
|
||||
// description: "Parse xml: array",
|
||||
// subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
|
||||
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>1</animal>\n<animal>2</animal>",
|
||||
// expected: "D0, P[], (doc)::animal:\n - \"1\"\n - \"2\"\n",
|
||||
// },
|
||||
// {
|
||||
// description: "Parse xml: attributes",
|
||||
// subdescription: "Attributes are converted to fields, with the attribute prefix.",
|
||||
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
|
||||
// expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n",
|
||||
// },
|
||||
// {
|
||||
// description: "Parse xml: attributes with content",
|
||||
// subdescription: "Content is added as a field, using the content name",
|
||||
// input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
|
||||
// expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
|
||||
// },
|
||||
{
|
||||
description: "Parse xml: simple",
|
||||
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>meow</cat>",
|
||||
expected: "D0, P[], (doc)::cat: meow\n",
|
||||
},
|
||||
{
|
||||
description: "Parse xml: array",
|
||||
subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
|
||||
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>1</animal>\n<animal>2</animal>",
|
||||
expected: "D0, P[], (doc)::animal:\n - \"1\"\n - \"2\"\n",
|
||||
},
|
||||
{
|
||||
description: "Parse xml: attributes",
|
||||
subdescription: "Attributes are converted to fields, with the attribute prefix.",
|
||||
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
|
||||
expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n",
|
||||
},
|
||||
{
|
||||
description: "Parse xml: attributes with content",
|
||||
subdescription: "Content is added as a field, using the content name",
|
||||
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
|
||||
expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
|
||||
},
|
||||
{
|
||||
description: "Encode xml: simple",
|
||||
input: "cat: purrs",
|
||||
expected: "<cat>purrs</cat>\n",
|
||||
encodeScenario: true,
|
||||
},
|
||||
{
|
||||
description: "Encode xml: array",
|
||||
input: "pets:\n cat:\n - purrs\n - meows",
|
||||
expected: "<pets>\n <cat>purrs</cat>\n <cat>meows</cat>\n</pets>\n",
|
||||
encodeScenario: true,
|
||||
},
|
||||
{
|
||||
description: "Encode xml: attributes",
|
||||
subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.",
|
||||
input: "cat:\n +name: tiger\n meows: true\n",
|
||||
expected: "<cat name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
|
||||
encodeScenario: true,
|
||||
},
|
||||
{
|
||||
skipDoc: true,
|
||||
input: "cat:\n ++name: tiger\n meows: true\n",
|
||||
expected: "<cat +name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
|
||||
encodeScenario: true,
|
||||
},
|
||||
{
|
||||
description: "Encode xml: attributes with content",
|
||||
subdescription: "Fields with the matching xml-content-name is assumed to be content.",
|
||||
input: "cat:\n +name: tiger\n +content: cool\n",
|
||||
expected: "<cat name=\"tiger\">cool</cat>\n",
|
||||
encodeScenario: true,
|
||||
},
|
||||
{
|
||||
description: "Encode xml: comments",
|
||||
subdescription: "A best attempt is made to copy comments to xml.",
|
||||
input: yamlWithComments,
|
||||
expected: expectedXmlWithComments,
|
||||
encodeScenario: true,
|
||||
skipDoc: true,
|
||||
input: "<!-- before cat --><cat><!-- in cat --><x>3<!--xca\ncool\nsmart --></x><y><!-- befored --><d><!-- ind -->4<!-- ind2 --></d><!-- afterd --></y><!-- after --></cat><!-- after cat -->",
|
||||
expected: expectedDecodeYamlWithComments,
|
||||
scenarioType: "decode",
|
||||
},
|
||||
// {
|
||||
// description: "Encode xml: simple",
|
||||
// input: "cat: purrs",
|
||||
// expected: "<cat>purrs</cat>\n",
|
||||
// scenarioType: "encode",
|
||||
// },
|
||||
// {
|
||||
// description: "Encode xml: array",
|
||||
// input: "pets:\n cat:\n - purrs\n - meows",
|
||||
// expected: "<pets>\n <cat>purrs</cat>\n <cat>meows</cat>\n</pets>\n",
|
||||
// scenarioType: "encode",
|
||||
// },
|
||||
// {
|
||||
// description: "Encode xml: attributes",
|
||||
// subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.",
|
||||
// input: "cat:\n +name: tiger\n meows: true\n",
|
||||
// expected: "<cat name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
|
||||
// scenarioType: "encode",
|
||||
// },
|
||||
// {
|
||||
// skipDoc: true,
|
||||
// input: "cat:\n ++name: tiger\n meows: true\n",
|
||||
// expected: "<cat +name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
|
||||
// scenarioType: "encode",
|
||||
// },
|
||||
// {
|
||||
// description: "Encode xml: attributes with content",
|
||||
// subdescription: "Fields with the matching xml-content-name is assumed to be content.",
|
||||
// input: "cat:\n +name: tiger\n +content: cool\n",
|
||||
// expected: "<cat name=\"tiger\">cool</cat>\n",
|
||||
// scenarioType: "encode",
|
||||
// },
|
||||
// {
|
||||
// description: "Encode xml: comments",
|
||||
// subdescription: "A best attempt is made to copy comments to xml.",
|
||||
// input: yamlWithComments,
|
||||
// expected: expectedXmlWithComments,
|
||||
// scenarioType: "encode",
|
||||
// },
|
||||
// {
|
||||
// skipDoc: true,
|
||||
// input: "<!-- beforeCat --><cat><!-- in cat -->value<!-- after --></cat><!-- after cat -->",
|
||||
// expected: "<!-- beforeCat --><cat><!-- in cat -->value</cat><!-- after cat -->",
|
||||
// scenarioType: "roundtrip",
|
||||
// },
|
||||
}
|
||||
|
||||
func testXmlScenario(t *testing.T, s *xmlScenario) {
|
||||
if s.encodeScenario {
|
||||
test.AssertResultWithContext(t, s.expected, yamlToXml(s.input, 2), s.description)
|
||||
func testXmlScenario(t *testing.T, s xmlScenario) {
|
||||
if s.scenarioType == "encode" || s.scenarioType == "roundtrip" {
|
||||
test.AssertResultWithContext(t, s.expected, processScenario(s), s.description)
|
||||
} else {
|
||||
var actual = resultToString(t, decodeXml(t, s.input))
|
||||
test.AssertResultWithContext(t, s.expected, actual, s.description)
|
||||
@ -148,7 +180,7 @@ func documentXmlScenario(t *testing.T, w *bufio.Writer, i interface{}) {
|
||||
if s.skipDoc {
|
||||
return
|
||||
}
|
||||
if s.encodeScenario {
|
||||
if s.scenarioType == "encode" {
|
||||
documentXmlEncodeScenario(w, s)
|
||||
} else {
|
||||
documentXmlDecodeScenario(t, w, s)
|
||||
@ -200,12 +232,12 @@ func documentXmlEncodeScenario(w *bufio.Writer, s xmlScenario) {
|
||||
writeOrPanic(w, "```bash\nyq e -o=xml '.' sample.yml\n```\n")
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", yamlToXml(s.input, 2)))
|
||||
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processScenario(s)))
|
||||
}
|
||||
|
||||
func TestXmlScenarios(t *testing.T) {
|
||||
for _, tt := range xmlScenarios {
|
||||
testXmlScenario(t, &tt)
|
||||
testXmlScenario(t, tt)
|
||||
}
|
||||
genericScenarios := make([]interface{}, len(xmlScenarios))
|
||||
for i, s := range xmlScenarios {
|
||||
|
Loading…
Reference in New Issue
Block a user