From a72743f9c9dc0f884a6227fb6afa37758dc249ad Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Wed, 22 Dec 2021 11:31:28 +1100 Subject: [PATCH] wip better comment parsing --- pkg/yqlib/decoder_xml.go | 49 +++++++++-- pkg/yqlib/doc/usage/xml.md | 176 ------------------------------------- pkg/yqlib/xml_test.go | 174 +++++++++++++++++++++--------------- 3 files changed, 145 insertions(+), 254 deletions(-) diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go index ada214ca..39e2b3ac 100644 --- a/pkg/yqlib/decoder_xml.go +++ b/pkg/yqlib/decoder_xml.go @@ -4,6 +4,7 @@ import ( "encoding/xml" "fmt" "io" + "strings" "unicode" "golang.org/x/net/html/charset" @@ -61,17 +62,19 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) { } func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { - yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.Comment} + log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment) + yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.HeadComment} if len(n.Data) > 0 { label := dec.contentPrefix yamlNode.Content = append(yamlNode.Content, createScalarNode(label, label), createScalarNode(n.Data, n.Data)) } - for _, keyValuePair := range n.Children { + for i, keyValuePair := range n.Children { label := keyValuePair.K children := keyValuePair.V labelNode := createScalarNode(label, label) + // labelNode.HeadComment = n.HeadComment var valueNode *yaml.Node var err error log.Debug("len of children in %v is %v", label, len(children)) @@ -81,10 +84,15 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { return nil, err } } else { + valueNode, err = dec.convertToYamlNode(children[0]) if err != nil { return nil, err } + + if i == len(n.Children)-1 { + valueNode.FootComment = n.FootComment + } } yamlNode.Content = append(yamlNode.Content, labelNode, valueNode) } @@ -97,7 +105,9 @@ func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) { return dec.createMap(n) } scalar := createScalarNode(n.Data, n.Data) - scalar.HeadComment = n.Comment + log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment) + scalar.LineComment = n.HeadComment + return scalar, nil } @@ -124,9 +134,10 @@ func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error { } type xmlNode struct { - Children []*xmlChildrenKv - Comment string - Data string + Children []*xmlChildrenKv + HeadComment string + FootComment string + Data string } type xmlChildrenKv struct { @@ -158,6 +169,7 @@ type element struct { parent *element n *xmlNode label string + state string } // this code is heavily based on https://github.com/basgys/goxml2json @@ -183,6 +195,8 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { switch se := t.(type) { case xml.StartElement: + log.Debug("start element %v", se.Name.Local) + elem.state = "started" // Build new a new current element and link it to its parent elem = &element{ parent: elem, @@ -198,6 +212,8 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { // Extract XML data (if any) elem.n.Data = trimNonGraphic(string(se)) case xml.EndElement: + log.Debug("end element %v", elem.label) + elem.state = "finished" // And add it to its parent list if elem.parent != nil { elem.parent.n.AddChild(elem.label, elem.n) @@ -206,13 +222,32 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { // Then change the current element to its parent elem = elem.parent case xml.Comment: - elem.n.Comment = trimNonGraphic(string(xml.CharData(se))) + + commentStr := trimNonGraphic(string(xml.CharData(se))) + if elem.state == "started" { + log.Debug("got a foot comment for %v: %v", elem.label, commentStr) + elem.n.FootComment = commentStr + } else { + log.Debug("got a head comment for %v: %v", elem.label, commentStr) + elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr}) + } + } } return nil } +func joinFilter(rawStrings []string) string { + stringsToJoin := make([]string, 0) + for _, str := range rawStrings { + if str != "" { + stringsToJoin = append(stringsToJoin, str) + } + } + return strings.Join(stringsToJoin, " ") +} + // trimNonGraphic returns a slice of the string s, with all leading and trailing // non graphic characters and spaces removed. // diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index 3edc6758..eee366c4 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -22,179 +22,3 @@ XML nodes that have attributes then plain content, e.g: The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this. -## Parse xml: simple -Given a sample.xml file of: -```xml - -meow -``` -then -```bash -yq e -p=xml '.' sample.xml -``` -will output -```yaml -cat: meow -``` - -## Parse xml: array -Consecutive nodes with identical xml names are assumed to be arrays. - -Given a sample.xml file of: -```xml - -1 -2 -``` -then -```bash -yq e -p=xml '.' sample.xml -``` -will output -```yaml -animal: - - "1" - - "2" -``` - -## Parse xml: attributes -Attributes are converted to fields, with the attribute prefix. - -Given a sample.xml file of: -```xml - - - 7 - -``` -then -```bash -yq e -p=xml '.' sample.xml -``` -will output -```yaml -cat: - +legs: "4" - legs: "7" -``` - -## Parse xml: attributes with content -Content is added as a field, using the content name - -Given a sample.xml file of: -```xml - -meow -``` -then -```bash -yq e -p=xml '.' sample.xml -``` -will output -```yaml -cat: - +content: meow - +legs: "4" -``` - -## Encode xml: simple -Given a sample.yml file of: -```yaml -cat: purrs -``` -then -```bash -yq e -o=xml '.' sample.yml -``` -will output -```xml -purrs -``` - -## Encode xml: array -Given a sample.yml file of: -```yaml -pets: - cat: - - purrs - - meows -``` -then -```bash -yq e -o=xml '.' sample.yml -``` -will output -```xml - - purrs - meows - -``` - -## Encode xml: attributes -Fields with the matching xml-attribute-prefix are assumed to be attributes. - -Given a sample.yml file of: -```yaml -cat: - +name: tiger - meows: true - -``` -then -```bash -yq e -o=xml '.' sample.yml -``` -will output -```xml - - true - -``` - -## Encode xml: attributes with content -Fields with the matching xml-content-name is assumed to be content. - -Given a sample.yml file of: -```yaml -cat: - +name: tiger - +content: cool - -``` -then -```bash -yq e -o=xml '.' sample.yml -``` -will output -```xml -cool -``` - -## Encode xml: comments -A best attempt is made to copy comments to xml. - -Given a sample.yml file of: -```yaml -# above_cat -cat: # inline_cat - # above_array - array: # inline_array - - val1 # inline_val1 - # above_val2 - - val2 # inline_val2 -# below_cat - -``` -then -```bash -yq e -o=xml '.' sample.yml -``` -will output -```xml - - val1 - val2 - -``` - diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index d9c0aaa3..e888a506 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -24,12 +24,18 @@ func decodeXml(t *testing.T, xml string) *CandidateNode { return &CandidateNode{Node: node} } -func yamlToXml(sampleYaml string, indent int) string { +func processScenario(s xmlScenario) string { var output bytes.Buffer writer := bufio.NewWriter(&output) - var encoder = NewXmlEncoder(writer, indent, "+", "+content") - inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder()) + var encoder = NewXmlEncoder(writer, 2, "+", "+content") + + var decoder = NewYamlDecoder() + if s.scenarioType == "roundtrip" { + decoder = NewXmlDecoder("+", "+content") + } + + inputs, err := readDocuments(strings.NewReader(s.input), "sample.yml", 0, decoder) if err != nil { panic(err) } @@ -49,10 +55,24 @@ type xmlScenario struct { description string subdescription string skipDoc bool - encodeScenario bool + scenarioType string } -var yamlWithComments = `need to fix leadingContent thing. This should fail.# above_cat +var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat +cat: + # in cat + x: "3" # xca + # cool + # smart + y: + # befored + d: "4" # ind ind2 + # afterd + +# after cat +` + +var yamlWithComments = `# above_cat cat: # inline_cat # above_array array: # inline_array @@ -69,73 +89,85 @@ var expectedXmlWithComments = `34", + expected: expectedDecodeYamlWithComments, + scenarioType: "decode", }, + // { + // description: "Encode xml: simple", + // input: "cat: purrs", + // expected: "purrs\n", + // scenarioType: "encode", + // }, + // { + // description: "Encode xml: array", + // input: "pets:\n cat:\n - purrs\n - meows", + // expected: "\n purrs\n meows\n\n", + // scenarioType: "encode", + // }, + // { + // description: "Encode xml: attributes", + // subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.", + // input: "cat:\n +name: tiger\n meows: true\n", + // expected: "\n true\n\n", + // scenarioType: "encode", + // }, + // { + // skipDoc: true, + // input: "cat:\n ++name: tiger\n meows: true\n", + // expected: "\n true\n\n", + // scenarioType: "encode", + // }, + // { + // description: "Encode xml: attributes with content", + // subdescription: "Fields with the matching xml-content-name is assumed to be content.", + // input: "cat:\n +name: tiger\n +content: cool\n", + // expected: "cool\n", + // scenarioType: "encode", + // }, + // { + // description: "Encode xml: comments", + // subdescription: "A best attempt is made to copy comments to xml.", + // input: yamlWithComments, + // expected: expectedXmlWithComments, + // scenarioType: "encode", + // }, + // { + // skipDoc: true, + // input: "value", + // expected: "value", + // scenarioType: "roundtrip", + // }, } -func testXmlScenario(t *testing.T, s *xmlScenario) { - if s.encodeScenario { - test.AssertResultWithContext(t, s.expected, yamlToXml(s.input, 2), s.description) +func testXmlScenario(t *testing.T, s xmlScenario) { + if s.scenarioType == "encode" || s.scenarioType == "roundtrip" { + test.AssertResultWithContext(t, s.expected, processScenario(s), s.description) } else { var actual = resultToString(t, decodeXml(t, s.input)) test.AssertResultWithContext(t, s.expected, actual, s.description) @@ -148,7 +180,7 @@ func documentXmlScenario(t *testing.T, w *bufio.Writer, i interface{}) { if s.skipDoc { return } - if s.encodeScenario { + if s.scenarioType == "encode" { documentXmlEncodeScenario(w, s) } else { documentXmlDecodeScenario(t, w, s) @@ -200,12 +232,12 @@ func documentXmlEncodeScenario(w *bufio.Writer, s xmlScenario) { writeOrPanic(w, "```bash\nyq e -o=xml '.' sample.yml\n```\n") writeOrPanic(w, "will output\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", yamlToXml(s.input, 2))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processScenario(s))) } func TestXmlScenarios(t *testing.T) { for _, tt := range xmlScenarios { - testXmlScenario(t, &tt) + testXmlScenario(t, tt) } genericScenarios := make([]interface{}, len(xmlScenarios)) for i, s := range xmlScenarios {