From deee3c1f604b1fef774cf6a7ca91026ba77e357c Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Sat, 15 Jan 2022 11:25:56 +1100 Subject: [PATCH] xml wip --- cmd/utils.go | 4 +- go.mod | 1 + go.sum | 2 + pkg/yqlib/decoder_xml.go | 41 ++++--- pkg/yqlib/doc/usage/xml.md | 227 ++++++++++++++++++++++++++++++++++++- pkg/yqlib/encoder_xml.go | 12 +- pkg/yqlib/xml_test.go | 185 +++++++++++++++++------------- test/utils.go | 12 +- 8 files changed, 385 insertions(+), 99 deletions(-) diff --git a/cmd/utils.go b/cmd/utils.go index 7688cad0..1fce4a6c 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -52,7 +52,7 @@ func configureDecoder() (yqlib.Decoder, error) { } switch yqlibInputFormat { case yqlib.XmlInputFormat: - return yqlib.NewXmlDecoder(), nil + return yqlib.NewXmlDecoder(xmlAttributePrefix, xmlContentName), nil } return yqlib.NewYamlDecoder(), nil } @@ -87,7 +87,7 @@ func configureEncoder(format yqlib.PrinterOutputFormat) yqlib.Encoder { case yqlib.YamlOutputFormat: return yqlib.NewYamlEncoder(indent, colorsEnabled, !noDocSeparators, unwrapScalar) case yqlib.XmlOutputFormat: - return yqlib.NewXmlEncoder(indent) + return yqlib.NewXmlEncoder(indent, xmlAttributePrefix, xmlContentName) } panic("invalid encoder") } diff --git a/go.mod b/go.mod index 0549f8e3..89fb3c5e 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/goccy/go-yaml v1.9.5 github.com/jinzhu/copier v0.3.4 github.com/magiconair/properties v1.8.5 + github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e github.com/spf13/cobra v1.3.0 github.com/timtadh/lexmachine v0.2.2 golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d diff --git a/go.sum b/go.sum index 6bbb4d7c..415b8504 100644 --- a/go.sum +++ b/go.sum @@ -292,6 +292,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e h1:aoZm08cpOy4WuID//EZDgcC4zIxODThtZNPirFr42+A= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go index a6da9a71..f847a474 100644 --- a/pkg/yqlib/decoder_xml.go +++ b/pkg/yqlib/decoder_xml.go @@ -34,15 +34,15 @@ func InputFormatFromString(format string) (InputFormat, error) { type xmlDecoder struct { reader io.Reader attributePrefix string - contentPrefix string + contentName string finished bool } -func NewXmlDecoder(attributePrefix string, contentPrefix string) Decoder { - if contentPrefix == "" { - contentPrefix = "content" +func NewXmlDecoder(attributePrefix string, contentName string) Decoder { + if contentName == "" { + contentName = "content" } - return &xmlDecoder{attributePrefix: attributePrefix, contentPrefix: contentPrefix, finished: false} + return &xmlDecoder{attributePrefix: attributePrefix, contentName: contentName, finished: false} } func (dec *xmlDecoder) Init(reader io.Reader) { @@ -75,7 +75,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { yamlNode := &yaml.Node{Kind: yaml.MappingNode} if len(n.Data) > 0 { - label := dec.contentPrefix + label := dec.contentName labelNode := createScalarNode(label, label) labelNode.HeadComment = dec.processComment(n.HeadComment) labelNode.FootComment = dec.processComment(n.FootComment) @@ -94,9 +94,9 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { } - if i == len(n.Children)-1 { - labelNode.FootComment = dec.processComment(n.FootComment) - } + // if i == len(n.Children)-1 { + labelNode.FootComment = dec.processComment(keyValuePair.FootComment) + // } log.Debug("len of children in %v is %v", label, len(children)) if len(children) > 1 { @@ -167,8 +167,9 @@ type xmlNode struct { } type xmlChildrenKv struct { - K string - V []*xmlNode + K string + V []*xmlNode + FootComment string } // AddChild appends a node to the list of children @@ -255,8 +256,8 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { commentStr := string(xml.CharData(se)) if elem.state == "started" { - log.Debug("got a foot comment for %v: [%v]", elem.label, commentStr) - elem.n.FootComment = joinFilter([]string{elem.n.FootComment, commentStr}) + applyFootComment(elem, commentStr) + } else if elem.state == "chardata" { log.Debug("got a line comment for (%v) %v: [%v]", elem.state, elem.label, commentStr) elem.n.LineComment = joinFilter([]string{elem.n.LineComment, commentStr}) @@ -271,6 +272,20 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { return nil } +func applyFootComment(elem *element, commentStr string) { + + // first lets try to put the comment on the last child + if len(elem.n.Children) > 0 { + lastChildIndex := len(elem.n.Children) - 1 + childKv := elem.n.Children[lastChildIndex] + log.Debug("got a foot comment for %v: [%v]", childKv.K, commentStr) + childKv.FootComment = joinFilter([]string{elem.n.FootComment, commentStr}) + } else { + log.Debug("got a foot comment for %v: [%v]", elem.label, commentStr) + elem.n.FootComment = joinFilter([]string{elem.n.FootComment, commentStr}) + } +} + func joinFilter(rawStrings []string) string { stringsToJoin := make([]string, 0) for _, str := range rawStrings { diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index 64535f3f..5b08ad9a 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -22,6 +22,229 @@ XML nodes that have attributes then plain content, e.g: The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this. +## Parse xml: simple +Given a sample.xml file of: +```xml + +meow +``` +then +```bash +yq e -p=xml '.' sample.xml +``` +will output +```yaml +cat: meow +``` + +## Parse xml: array +Consecutive nodes with identical xml names are assumed to be arrays. + +Given a sample.xml file of: +```xml + +1 +2 +``` +then +```bash +yq e -p=xml '.' sample.xml +``` +will output +```yaml +animal: + - "1" + - "2" +``` + +## Parse xml: attributes +Attributes are converted to fields, with the attribute prefix. + +Given a sample.xml file of: +```xml + + + 7 + +``` +then +```bash +yq e -p=xml '.' sample.xml +``` +will output +```yaml +cat: + +legs: "4" + legs: "7" +``` + +## Parse xml: attributes with content +Content is added as a field, using the content name + +Given a sample.xml file of: +```xml + +meow +``` +then +```bash +yq e -p=xml '.' sample.xml +``` +will output +```yaml +cat: + +content: meow + +legs: "4" +``` + +## Parse xml: with comments +A best attempt is made to preserve comments. + +Given a sample.xml file of: +```xml + + + + + 3 + + + + z + + + + + + + +``` +then +```bash +yq e -p=xml '.' sample.xml +``` +will output +```yaml +# before cat +cat: + # in cat before + x: "3" # multi + # line comment + # for x + # before y + + y: + # in y before + # in d before + d: z # in d after + # in y after + # in_cat_after +# after cat +``` + +## Encode xml: simple +Given a sample.yml file of: +```yaml +cat: purrs +``` +then +```bash +yq e -o=xml '.' sample.yml +``` +will output +```xml +purrs +``` + +## Encode xml: array +Given a sample.yml file of: +```yaml +pets: + cat: + - purrs + - meows +``` +then +```bash +yq e -o=xml '.' sample.yml +``` +will output +```xml + + purrs + meows + +``` + +## Encode xml: attributes +Fields with the matching xml-attribute-prefix are assumed to be attributes. + +Given a sample.yml file of: +```yaml +cat: + +name: tiger + meows: true + +``` +then +```bash +yq e -o=xml '.' sample.yml +``` +will output +```xml + + true + +``` + +## Encode xml: attributes with content +Fields with the matching xml-content-name is assumed to be content. + +Given a sample.yml file of: +```yaml +cat: + +name: tiger + +content: cool + +``` +then +```bash +yq e -o=xml '.' sample.yml +``` +will output +```xml +cool +``` + +## Encode xml: comments +A best attempt is made to copy comments to xml. + +Given a sample.yml file of: +```yaml +# above_cat +cat: # inline_cat + # above_array + array: # inline_array + - val1 # inline_val1 + # above_val2 + - val2 # inline_val2 +# below_cat + +``` +then +```bash +yq e -o=xml '.' sample.yml +``` +will output +```xml + + val1 + val2 + +``` + ## Round trip: with comments A best effort is made, but comment positions and white space are not preserved perfectly. @@ -58,12 +281,14 @@ cat: x: "3" # multi # line comment # for x + # before y + y: # in y before # in d before d: z # in d after # in y after - # before y in_cat_after + # in_cat_after # after cat ``` diff --git a/pkg/yqlib/encoder_xml.go b/pkg/yqlib/encoder_xml.go index c97c3abb..b5933718 100644 --- a/pkg/yqlib/encoder_xml.go +++ b/pkg/yqlib/encoder_xml.go @@ -168,16 +168,18 @@ func (e *xmlEncoder) encodeComment(encoder *xml.Encoder, commentStr string) erro } func (e *xmlEncoder) encodeArray(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { - e.encodeComment(encoder, headAndLineComment(node)) + + if err := e.encodeComment(encoder, headAndLineComment(node)); err != nil { + return err + } + for i := 0; i < len(node.Content); i++ { value := node.Content[i] - err := e.doEncode(encoder, value, start.Copy()) - if err != nil { + if err := e.doEncode(encoder, value, start.Copy()); err != nil { return err } } - e.encodeComment(encoder, footComment(node)) - return nil + return e.encodeComment(encoder, footComment(node)) } func (e *xmlEncoder) encodeMap(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index d8b944cb..e375c49d 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -105,13 +105,14 @@ cat: y: # in y before - # in d before d: + # in d before z: - sweet: cool - # in d after - # in y after - # after cat + +sweet: cool + # in d after + # in y after + # in_cat_after +# after cat ` var inputXmlWithCommentsWithArray = ` @@ -124,7 +125,8 @@ for x --> - + + @@ -133,6 +135,30 @@ for x --> ` +var expectedDecodeYamlWithArray = `D0, P[], (doc)::# before cat +cat: + # in cat before + x: "3" # multi + # line comment + # for x + # before y + + y: + # in y before + d: + - # in d before + z: + +sweet: cool + # in d after + - # in d2 before + z: + +sweet: cool2 + # in d2 after + # in y after + # in_cat_after +# after cat +` + var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat cat: # in cat before @@ -146,7 +172,7 @@ cat: # in d before d: z # in d after # in y after - + # in_cat_after # after cat ` @@ -178,36 +204,36 @@ var expectedXmlWithComments = `