From ec8ef312efae98ebbde8f8edff68328f7d3202a4 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Sat, 15 Jan 2022 11:57:59 +1100 Subject: [PATCH] Added XML encoding/decoding (#1067) * Added XML encoding/decoding * Minor fixes * Improve yq doc * Lint --- acceptance_tests/output-format.sh | 21 ++ cmd/evaluate_all_command.go | 3 +- cmd/evalute_sequence_command.go | 3 +- cmd/root.go | 2 +- cmd/utils.go | 18 ++ examples/data1.yaml | 16 +- examples/mike.xml | 16 +- go.mod | 1 + go.sum | 2 + pkg/yqlib/decoder_xml.go | 113 +++++++++-- pkg/yqlib/doc/usage/headers/xml.md | 4 - pkg/yqlib/doc/usage/xml.md | 135 ++++++++++++- pkg/yqlib/encoder.go | 90 +-------- pkg/yqlib/encoder_csv.go | 33 +++- pkg/yqlib/encoder_csv_test.go | 4 +- pkg/yqlib/encoder_json.go | 64 ++++++ pkg/yqlib/encoder_properties.go | 56 +++++- pkg/yqlib/encoder_properties_test.go | 4 +- pkg/yqlib/encoder_test.go | 4 +- pkg/yqlib/encoder_xml.go | 197 ++++++++++++++----- pkg/yqlib/encoder_yaml.go | 101 ++++++++++ pkg/yqlib/lib.go | 18 +- pkg/yqlib/operator_comments.go | 3 +- pkg/yqlib/operator_encoder_decoder.go | 26 ++- pkg/yqlib/operators_test.go | 10 +- pkg/yqlib/printer.go | 76 +++----- pkg/yqlib/printer_test.go | 20 +- pkg/yqlib/utils.go | 37 ---- pkg/yqlib/xml_test.go | 268 +++++++++++++++++++++++--- test/utils.go | 11 +- 30 files changed, 1005 insertions(+), 351 deletions(-) create mode 100644 pkg/yqlib/encoder_json.go create mode 100644 pkg/yqlib/encoder_yaml.go diff --git a/acceptance_tests/output-format.sh b/acceptance_tests/output-format.sh index 9f2e79d9..995f0fcf 100755 --- a/acceptance_tests/output-format.sh +++ b/acceptance_tests/output-format.sh @@ -142,4 +142,25 @@ EOM assertEquals "$expected" "$X" } +testOutputXmComplex() { + cat >test.yml < + + cat + dog + + +EOM + + X=$(./yq e --output-format=x test.yml) + assertEquals "$expected" "$X" + + X=$(./yq ea --output-format=x test.yml) + assertEquals "$expected" "$X" +} + source ./scripts/shunit2 \ No newline at end of file diff --git a/cmd/evaluate_all_command.go b/cmd/evaluate_all_command.go index df8b15bc..862a2460 100644 --- a/cmd/evaluate_all_command.go +++ b/cmd/evaluate_all_command.go @@ -85,8 +85,9 @@ func evaluateAll(cmd *cobra.Command, args []string) (cmdError error) { } printerWriter := configurePrinterWriter(format, out) + encoder := configureEncoder(format) - printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators) + printer := yqlib.NewPrinter(encoder, printerWriter) if frontMatter != "" { frontMatterHandler := yqlib.NewFrontMatterHandler(args[firstFileIndex]) diff --git a/cmd/evalute_sequence_command.go b/cmd/evalute_sequence_command.go index e0e14d27..158e2b3a 100644 --- a/cmd/evalute_sequence_command.go +++ b/cmd/evalute_sequence_command.go @@ -93,8 +93,9 @@ func evaluateSequence(cmd *cobra.Command, args []string) (cmdError error) { } printerWriter := configurePrinterWriter(format, out) + encoder := configureEncoder(format) - printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators) + printer := yqlib.NewPrinter(encoder, printerWriter) decoder, err := configureDecoder() if err != nil { diff --git a/cmd/root.go b/cmd/root.go index 6c7550e6..b443be1c 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -52,7 +52,7 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`, } rootCmd.PersistentFlags().StringVarP(&outputFormat, "output-format", "o", "yaml", "[yaml|y|json|j|props|p|xml|x] output format type.") - rootCmd.PersistentFlags().StringVarP(&inputFormat, "input-format", "p", "yaml", "[yaml|y|xml|x] input format type. Note that json is a subset of yaml.") + rootCmd.PersistentFlags().StringVarP(&inputFormat, "input-format", "p", "yaml", "[yaml|y|xml|x] parse format for input. Note that json is a subset of yaml.") rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes") rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).") diff --git a/cmd/utils.go b/cmd/utils.go index 7ab518a4..1fce4a6c 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -73,3 +73,21 @@ func configurePrinterWriter(format yqlib.PrinterOutputFormat, out io.Writer) yql } return printerWriter } + +func configureEncoder(format yqlib.PrinterOutputFormat) yqlib.Encoder { + switch format { + case yqlib.JsonOutputFormat: + return yqlib.NewJsonEncoder(indent) + case yqlib.PropsOutputFormat: + return yqlib.NewPropertiesEncoder() + case yqlib.CsvOutputFormat: + return yqlib.NewCsvEncoder(',') + case yqlib.TsvOutputFormat: + return yqlib.NewCsvEncoder('\t') + case yqlib.YamlOutputFormat: + return yqlib.NewYamlEncoder(indent, colorsEnabled, !noDocSeparators, unwrapScalar) + case yqlib.XmlOutputFormat: + return yqlib.NewXmlEncoder(indent, xmlAttributePrefix, xmlContentName) + } + panic("invalid encoder") +} diff --git a/examples/data1.yaml b/examples/data1.yaml index 3ef8ec68..1bf3a5cb 100644 --- a/examples/data1.yaml +++ b/examples/data1.yaml @@ -1,15 +1 @@ -strings: - - a: banana - - a: cat - - a: apple - -numbers: - - a: 12 - - a: 13 - - a: 120 - - -obj: - - a: {cat: "adog"} - - a: {cat: "doga"} - - a: apple +cat: purrs diff --git a/examples/mike.xml b/examples/mike.xml index 2e1813a4..94773b86 100644 --- a/examples/mike.xml +++ b/examples/mike.xml @@ -1,2 +1,14 @@ - -BiBi \ No newline at end of file + + + + 3 + + + 4 + + + + + \ No newline at end of file diff --git a/go.mod b/go.mod index 0549f8e3..89fb3c5e 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/goccy/go-yaml v1.9.5 github.com/jinzhu/copier v0.3.4 github.com/magiconair/properties v1.8.5 + github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e github.com/spf13/cobra v1.3.0 github.com/timtadh/lexmachine v0.2.2 golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d diff --git a/go.sum b/go.sum index 6bbb4d7c..415b8504 100644 --- a/go.sum +++ b/go.sum @@ -292,6 +292,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e h1:aoZm08cpOy4WuID//EZDgcC4zIxODThtZNPirFr42+A= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go index ada214ca..25b0f891 100644 --- a/pkg/yqlib/decoder_xml.go +++ b/pkg/yqlib/decoder_xml.go @@ -4,6 +4,7 @@ import ( "encoding/xml" "fmt" "io" + "strings" "unicode" "golang.org/x/net/html/charset" @@ -31,15 +32,15 @@ func InputFormatFromString(format string) (InputFormat, error) { type xmlDecoder struct { reader io.Reader attributePrefix string - contentPrefix string + contentName string finished bool } -func NewXmlDecoder(attributePrefix string, contentPrefix string) Decoder { - if contentPrefix == "" { - contentPrefix = "content" +func NewXmlDecoder(attributePrefix string, contentName string) Decoder { + if contentName == "" { + contentName = "content" } - return &xmlDecoder{attributePrefix: attributePrefix, contentPrefix: contentPrefix, finished: false} + return &xmlDecoder{attributePrefix: attributePrefix, contentName: contentName, finished: false} } func (dec *xmlDecoder) Init(reader io.Reader) { @@ -60,20 +61,41 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) { return yamlNode, nil } +func (dec *xmlDecoder) processComment(c string) string { + if c == "" { + return "" + } + return "#" + strings.TrimRight(c, " ") +} + func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { - yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.Comment} + log.Debug("createMap: headC: %v, footC: %v", n.HeadComment, n.FootComment) + yamlNode := &yaml.Node{Kind: yaml.MappingNode} if len(n.Data) > 0 { - label := dec.contentPrefix - yamlNode.Content = append(yamlNode.Content, createScalarNode(label, label), createScalarNode(n.Data, n.Data)) + label := dec.contentName + labelNode := createScalarNode(label, label) + labelNode.HeadComment = dec.processComment(n.HeadComment) + labelNode.FootComment = dec.processComment(n.FootComment) + yamlNode.Content = append(yamlNode.Content, labelNode, createScalarNode(n.Data, n.Data)) } - for _, keyValuePair := range n.Children { + for i, keyValuePair := range n.Children { label := keyValuePair.K children := keyValuePair.V labelNode := createScalarNode(label, label) var valueNode *yaml.Node var err error + + if i == 0 { + labelNode.HeadComment = dec.processComment(n.HeadComment) + + } + + // if i == len(n.Children)-1 { + labelNode.FootComment = dec.processComment(keyValuePair.FootComment) + // } + log.Debug("len of children in %v is %v", label, len(children)) if len(children) > 1 { valueNode, err = dec.createSequence(children) @@ -81,6 +103,13 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) { return nil, err } } else { + // comment hack for maps of scalars + // if the value is a scalar, the head comment of the scalar needs to go on the key? + // add tests for as well as multiple of inputXmlWithComments > yaml + if len(children[0].Children) == 0 && children[0].HeadComment != "" { + labelNode.HeadComment = labelNode.HeadComment + "\n" + strings.TrimSpace(children[0].HeadComment) + children[0].HeadComment = "" + } valueNode, err = dec.convertToYamlNode(children[0]) if err != nil { return nil, err @@ -97,7 +126,11 @@ func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) { return dec.createMap(n) } scalar := createScalarNode(n.Data, n.Data) - scalar.HeadComment = n.Comment + log.Debug("scalar headC: %v, footC: %v", n.HeadComment, n.FootComment) + scalar.HeadComment = dec.processComment(n.HeadComment) + scalar.LineComment = dec.processComment(n.LineComment) + scalar.FootComment = dec.processComment(n.FootComment) + return scalar, nil } @@ -124,14 +157,17 @@ func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error { } type xmlNode struct { - Children []*xmlChildrenKv - Comment string - Data string + Children []*xmlChildrenKv + HeadComment string + FootComment string + LineComment string + Data string } type xmlChildrenKv struct { - K string - V []*xmlNode + K string + V []*xmlNode + FootComment string } // AddChild appends a node to the list of children @@ -158,6 +194,7 @@ type element struct { parent *element n *xmlNode label string + state string } // this code is heavily based on https://github.com/basgys/goxml2json @@ -183,6 +220,8 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { switch se := t.(type) { case xml.StartElement: + log.Debug("start element %v", se.Name.Local) + elem.state = "started" // Build new a new current element and link it to its parent elem = &element{ parent: elem, @@ -197,7 +236,13 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { case xml.CharData: // Extract XML data (if any) elem.n.Data = trimNonGraphic(string(se)) + if elem.n.Data != "" { + elem.state = "chardata" + log.Debug("chardata [%v] for %v", elem.n.Data, elem.label) + } case xml.EndElement: + log.Debug("end element %v", elem.label) + elem.state = "finished" // And add it to its parent list if elem.parent != nil { elem.parent.n.AddChild(elem.label, elem.n) @@ -206,13 +251,49 @@ func (dec *xmlDecoder) decodeXml(root *xmlNode) error { // Then change the current element to its parent elem = elem.parent case xml.Comment: - elem.n.Comment = trimNonGraphic(string(xml.CharData(se))) + + commentStr := string(xml.CharData(se)) + if elem.state == "started" { + applyFootComment(elem, commentStr) + + } else if elem.state == "chardata" { + log.Debug("got a line comment for (%v) %v: [%v]", elem.state, elem.label, commentStr) + elem.n.LineComment = joinFilter([]string{elem.n.LineComment, commentStr}) + } else { + log.Debug("got a head comment for (%v) %v: [%v]", elem.state, elem.label, commentStr) + elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr}) + } + } } return nil } +func applyFootComment(elem *element, commentStr string) { + + // first lets try to put the comment on the last child + if len(elem.n.Children) > 0 { + lastChildIndex := len(elem.n.Children) - 1 + childKv := elem.n.Children[lastChildIndex] + log.Debug("got a foot comment for %v: [%v]", childKv.K, commentStr) + childKv.FootComment = joinFilter([]string{elem.n.FootComment, commentStr}) + } else { + log.Debug("got a foot comment for %v: [%v]", elem.label, commentStr) + elem.n.FootComment = joinFilter([]string{elem.n.FootComment, commentStr}) + } +} + +func joinFilter(rawStrings []string) string { + stringsToJoin := make([]string, 0) + for _, str := range rawStrings { + if str != "" { + stringsToJoin = append(stringsToJoin, str) + } + } + return strings.Join(stringsToJoin, " ") +} + // trimNonGraphic returns a slice of the string s, with all leading and trailing // non graphic characters and spaces removed. // diff --git a/pkg/yqlib/doc/usage/headers/xml.md b/pkg/yqlib/doc/usage/headers/xml.md index b182a97b..1bf3d01a 100644 --- a/pkg/yqlib/doc/usage/headers/xml.md +++ b/pkg/yqlib/doc/usage/headers/xml.md @@ -2,8 +2,6 @@ Encode and decode to and from XML. Whitespace is not conserved for round trips - but the order of the fields are. -As yaml does not have the concept of attributes, xml attributes are converted to regular fields with a prefix to prevent clobbering. This defaults to "+", use the `--xml-attribute-prefix` to change. - Consecutive xml nodes with the same name are assumed to be arrays. All values in XML are assumed to be strings - but you can use `from_yaml` to parse them into their correct types: @@ -14,8 +12,6 @@ yq e -p=xml '.myNumberField |= from_yaml' my.xml ``` -XML nodes that have attributes then plain content, e.g: - ```xml meow ``` diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index 17c19871..c18a039e 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -2,8 +2,6 @@ Encode and decode to and from XML. Whitespace is not conserved for round trips - but the order of the fields are. -As yaml does not have the concept of attributes, xml attributes are converted to regular fields with a prefix to prevent clobbering. This defaults to "+", use the `--xml-attribute-prefix` to change. - Consecutive xml nodes with the same name are assumed to be arrays. All values in XML are assumed to be strings - but you can use `from_yaml` to parse them into their correct types: @@ -14,8 +12,6 @@ yq e -p=xml '.myNumberField |= from_yaml' my.xml ``` -XML nodes that have attributes then plain content, e.g: - ```xml meow ``` @@ -58,7 +54,7 @@ animal: ``` ## Parse xml: attributes -Attributes are converted to fields, with the attribute prefix. +Attributes are converted to fields, with the default attribute prefix '+'. Use '--xml-attribute-prefix` to set your own. Given a sample.xml file of: ```xml @@ -79,7 +75,7 @@ cat: ``` ## Parse xml: attributes with content -Content is added as a field, using the content name +Content is added as a field, using the default content name of '+content'. Use `--xml-content-name` to set your own. Given a sample.xml file of: ```xml @@ -97,6 +93,53 @@ cat: +legs: "4" ``` +## Parse xml: with comments +A best attempt is made to preserve comments. + +Given a sample.xml file of: +```xml + + + + + 3 + + + + z + + + + + + + +``` +then +```bash +yq e -p=xml '.' sample.xml +``` +will output +```yaml +# before cat +cat: + # in cat before + x: "3" # multi + # line comment + # for x + # before y + + y: + # in y before + # in d before + d: z # in d after + # in y after + # in_cat_after +# after cat +``` + ## Encode xml: simple Given a sample.yml file of: ```yaml @@ -108,7 +151,8 @@ yq e -o=xml '.' sample.yml ``` will output ```xml -purrs``` +purrs +``` ## Encode xml: array Given a sample.yml file of: @@ -127,7 +171,8 @@ will output purrs meows -``` + +``` ## Encode xml: attributes Fields with the matching xml-attribute-prefix are assumed to be attributes. @@ -147,7 +192,8 @@ will output ```xml true -``` + +``` ## Encode xml: attributes with content Fields with the matching xml-content-name is assumed to be content. @@ -165,5 +211,74 @@ yq e -o=xml '.' sample.yml ``` will output ```xml -cool``` +cool +``` + +## Encode xml: comments +A best attempt is made to copy comments to xml. + +Given a sample.yml file of: +```yaml +# above_cat +cat: # inline_cat + # above_array + array: # inline_array + - val1 # inline_val1 + # above_val2 + - val2 # inline_val2 +# below_cat + +``` +then +```bash +yq e -o=xml '.' sample.yml +``` +will output +```xml + + val1 + val2 + +``` + +## Round trip: with comments +A best effort is made, but comment positions and white space are not preserved perfectly. + +Given a sample.xml file of: +```xml + + + + + 3 + + + + z + + + + + + + +``` +then +```bash +yq e -p=xml -o=xml '.' sample.xml +``` +will output +```xml + + 3 + + z + + +``` diff --git a/pkg/yqlib/encoder.go b/pkg/yqlib/encoder.go index 71ec6c38..d0d5af86 100644 --- a/pkg/yqlib/encoder.go +++ b/pkg/yqlib/encoder.go @@ -11,92 +11,10 @@ import ( ) type Encoder interface { - Encode(node *yaml.Node) error -} - -type yamlEncoder struct { - destination io.Writer - indent int - colorise bool - firstDoc bool -} - -func NewYamlEncoder(destination io.Writer, indent int, colorise bool) Encoder { - if indent < 0 { - indent = 0 - } - return &yamlEncoder{destination, indent, colorise, true} -} - -func (ye *yamlEncoder) Encode(node *yaml.Node) error { - - destination := ye.destination - tempBuffer := bytes.NewBuffer(nil) - if ye.colorise { - destination = tempBuffer - } - - var encoder = yaml.NewEncoder(destination) - - encoder.SetIndent(ye.indent) - // TODO: work out if the first doc had a separator or not. - if ye.firstDoc { - ye.firstDoc = false - } else if _, err := destination.Write([]byte("---\n")); err != nil { - return err - } - - if err := encoder.Encode(node); err != nil { - return err - } - - if ye.colorise { - return colorizeAndPrint(tempBuffer.Bytes(), ye.destination) - } - return nil -} - -type jsonEncoder struct { - encoder *json.Encoder -} - -func mapKeysToStrings(node *yaml.Node) { - - if node.Kind == yaml.MappingNode { - for index, child := range node.Content { - if index%2 == 0 { // its a map key - child.Tag = "!!str" - } - } - } - - for _, child := range node.Content { - mapKeysToStrings(child) - } -} - -func NewJsonEncoder(destination io.Writer, indent int) Encoder { - var encoder = json.NewEncoder(destination) - encoder.SetEscapeHTML(false) // do not escape html chars e.g. &, <, > - - var indentString = "" - - for index := 0; index < indent; index++ { - indentString = indentString + " " - } - encoder.SetIndent("", indentString) - return &jsonEncoder{encoder} -} - -func (je *jsonEncoder) Encode(node *yaml.Node) error { - var dataBucket orderedMap - // firstly, convert all map keys to strings - mapKeysToStrings(node) - errorDecoding := node.Decode(&dataBucket) - if errorDecoding != nil { - return errorDecoding - } - return je.encoder.Encode(dataBucket) + Encode(writer io.Writer, node *yaml.Node) error + PrintDocumentSeparator(writer io.Writer) error + PrintLeadingContent(writer io.Writer, content string) error + CanHandleAliases() bool } // orderedMap allows to marshal and unmarshal JSON and YAML values keeping the diff --git a/pkg/yqlib/encoder_csv.go b/pkg/yqlib/encoder_csv.go index 654e05ac..2e321fa5 100644 --- a/pkg/yqlib/encoder_csv.go +++ b/pkg/yqlib/encoder_csv.go @@ -9,16 +9,26 @@ import ( ) type csvEncoder struct { - destination csv.Writer + separator rune } -func NewCsvEncoder(destination io.Writer, separator rune) Encoder { - csvWriter := *csv.NewWriter(destination) - csvWriter.Comma = separator - return &csvEncoder{csvWriter} +func NewCsvEncoder(separator rune) Encoder { + return &csvEncoder{separator} } -func (e *csvEncoder) encodeRow(contents []*yaml.Node) error { +func (e *csvEncoder) CanHandleAliases() bool { + return false +} + +func (e *csvEncoder) PrintDocumentSeparator(writer io.Writer) error { + return nil +} + +func (e *csvEncoder) PrintLeadingContent(writer io.Writer, content string) error { + return nil +} + +func (e *csvEncoder) encodeRow(csvWriter *csv.Writer, contents []*yaml.Node) error { stringValues := make([]string, len(contents)) for i, child := range contents { @@ -28,10 +38,13 @@ func (e *csvEncoder) encodeRow(contents []*yaml.Node) error { } stringValues[i] = child.Value } - return e.destination.Write(stringValues) + return csvWriter.Write(stringValues) } -func (e *csvEncoder) Encode(originalNode *yaml.Node) error { +func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error { + csvWriter := csv.NewWriter(writer) + csvWriter.Comma = e.separator + // node must be a sequence node := unwrapDoc(originalNode) if node.Kind != yaml.SequenceNode { @@ -40,7 +53,7 @@ func (e *csvEncoder) Encode(originalNode *yaml.Node) error { return nil } if node.Content[0].Kind == yaml.ScalarNode { - return e.encodeRow(node.Content) + return e.encodeRow(csvWriter, node.Content) } for i, child := range node.Content { @@ -48,7 +61,7 @@ func (e *csvEncoder) Encode(originalNode *yaml.Node) error { if child.Kind != yaml.SequenceNode { return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag) } - err := e.encodeRow(child.Content) + err := e.encodeRow(csvWriter, child.Content) if err != nil { return err } diff --git a/pkg/yqlib/encoder_csv_test.go b/pkg/yqlib/encoder_csv_test.go index 816cbfe4..9e636c98 100644 --- a/pkg/yqlib/encoder_csv_test.go +++ b/pkg/yqlib/encoder_csv_test.go @@ -13,13 +13,13 @@ func yamlToCsv(sampleYaml string, separator rune) string { var output bytes.Buffer writer := bufio.NewWriter(&output) - var jsonEncoder = NewCsvEncoder(writer, separator) + var jsonEncoder = NewCsvEncoder(separator) inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder()) if err != nil { panic(err) } node := inputs.Front().Value.(*CandidateNode).Node - err = jsonEncoder.Encode(node) + err = jsonEncoder.Encode(writer, node) if err != nil { panic(err) } diff --git a/pkg/yqlib/encoder_json.go b/pkg/yqlib/encoder_json.go new file mode 100644 index 00000000..cfe96e9e --- /dev/null +++ b/pkg/yqlib/encoder_json.go @@ -0,0 +1,64 @@ +package yqlib + +import ( + "encoding/json" + "io" + + yaml "gopkg.in/yaml.v3" +) + +type jsonEncoder struct { + indentString string +} + +func mapKeysToStrings(node *yaml.Node) { + + if node.Kind == yaml.MappingNode { + for index, child := range node.Content { + if index%2 == 0 { // its a map key + child.Tag = "!!str" + } + } + } + + for _, child := range node.Content { + mapKeysToStrings(child) + } +} + +func NewJsonEncoder(indent int) Encoder { + var indentString = "" + + for index := 0; index < indent; index++ { + indentString = indentString + " " + } + + return &jsonEncoder{indentString} +} + +func (je *jsonEncoder) CanHandleAliases() bool { + return false +} + +func (je *jsonEncoder) PrintDocumentSeparator(writer io.Writer) error { + return nil +} + +func (je *jsonEncoder) PrintLeadingContent(writer io.Writer, content string) error { + return nil +} + +func (je *jsonEncoder) Encode(writer io.Writer, node *yaml.Node) error { + var encoder = json.NewEncoder(writer) + encoder.SetEscapeHTML(false) // do not escape html chars e.g. &, <, > + encoder.SetIndent("", je.indentString) + + var dataBucket orderedMap + // firstly, convert all map keys to strings + mapKeysToStrings(node) + errorDecoding := node.Decode(&dataBucket) + if errorDecoding != nil { + return errorDecoding + } + return encoder.Encode(dataBucket) +} diff --git a/pkg/yqlib/encoder_properties.go b/pkg/yqlib/encoder_properties.go index 94de0cb5..287b707f 100644 --- a/pkg/yqlib/encoder_properties.go +++ b/pkg/yqlib/encoder_properties.go @@ -1,6 +1,8 @@ package yqlib import ( + "bufio" + "errors" "fmt" "io" "strings" @@ -10,14 +12,54 @@ import ( ) type propertiesEncoder struct { - destination io.Writer } -func NewPropertiesEncoder(destination io.Writer) Encoder { - return &propertiesEncoder{destination} +func NewPropertiesEncoder() Encoder { + return &propertiesEncoder{} } -func (pe *propertiesEncoder) Encode(node *yaml.Node) error { +func (pe *propertiesEncoder) CanHandleAliases() bool { + return false +} + +func (pe *propertiesEncoder) PrintDocumentSeparator(writer io.Writer) error { + return nil +} + +func (pe *propertiesEncoder) PrintLeadingContent(writer io.Writer, content string) error { + reader := bufio.NewReader(strings.NewReader(content)) + for { + + readline, errReading := reader.ReadString('\n') + if errReading != nil && !errors.Is(errReading, io.EOF) { + return errReading + } + if strings.Contains(readline, "$yqDocSeperator$") { + + if err := pe.PrintDocumentSeparator(writer); err != nil { + return err + } + + } else { + if err := writeString(writer, readline); err != nil { + return err + } + } + + if errors.Is(errReading, io.EOF) { + if readline != "" { + // the last comment we read didn't have a new line, put one in + if err := writeString(writer, "\n"); err != nil { + return err + } + } + break + } + } + return nil +} + +func (pe *propertiesEncoder) Encode(writer io.Writer, node *yaml.Node) error { mapKeysToStrings(node) p := properties.NewProperties() err := pe.doEncode(p, node, "") @@ -25,14 +67,12 @@ func (pe *propertiesEncoder) Encode(node *yaml.Node) error { return err } - _, err = p.WriteComment(pe.destination, "#", properties.UTF8) + _, err = p.WriteComment(writer, "#", properties.UTF8) return err } func (pe *propertiesEncoder) doEncode(p *properties.Properties, node *yaml.Node, path string) error { - p.SetComment(path, - strings.Replace(node.HeadComment, "#", "", 1)+ - strings.Replace(node.LineComment, "#", "", 1)) + p.SetComment(path, headAndLineComment(node)) switch node.Kind { case yaml.ScalarNode: _, _, err := p.Set(path, node.Value) diff --git a/pkg/yqlib/encoder_properties_test.go b/pkg/yqlib/encoder_properties_test.go index f82af12a..97e1c62b 100644 --- a/pkg/yqlib/encoder_properties_test.go +++ b/pkg/yqlib/encoder_properties_test.go @@ -13,13 +13,13 @@ func yamlToProps(sampleYaml string) string { var output bytes.Buffer writer := bufio.NewWriter(&output) - var propsEncoder = NewPropertiesEncoder(writer) + var propsEncoder = NewPropertiesEncoder() inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder()) if err != nil { panic(err) } node := inputs.Front().Value.(*CandidateNode).Node - err = propsEncoder.Encode(node) + err = propsEncoder.Encode(writer, node) if err != nil { panic(err) } diff --git a/pkg/yqlib/encoder_test.go b/pkg/yqlib/encoder_test.go index 576cfb95..e21fe695 100644 --- a/pkg/yqlib/encoder_test.go +++ b/pkg/yqlib/encoder_test.go @@ -13,13 +13,13 @@ func yamlToJson(sampleYaml string, indent int) string { var output bytes.Buffer writer := bufio.NewWriter(&output) - var jsonEncoder = NewJsonEncoder(writer, indent) + var jsonEncoder = NewJsonEncoder(indent) inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder()) if err != nil { panic(err) } node := inputs.Front().Value.(*CandidateNode).Node - err = jsonEncoder.Encode(node) + err = jsonEncoder.Encode(writer, node) if err != nil { panic(err) } diff --git a/pkg/yqlib/encoder_xml.go b/pkg/yqlib/encoder_xml.go index f1cac841..7914b8d4 100644 --- a/pkg/yqlib/encoder_xml.go +++ b/pkg/yqlib/encoder_xml.go @@ -9,89 +9,155 @@ import ( yaml "gopkg.in/yaml.v3" ) +var XmlPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content"} + type xmlEncoder struct { - xmlEncoder *xml.Encoder attributePrefix string contentName string + indentString string } -func NewXmlEncoder(writer io.Writer, indent int, attributePrefix string, contentName string) Encoder { - encoder := xml.NewEncoder(writer) +func NewXmlEncoder(indent int, attributePrefix string, contentName string) Encoder { var indentString = "" for index := 0; index < indent; index++ { indentString = indentString + " " } - encoder.Indent("", indentString) - return &xmlEncoder{encoder, attributePrefix, contentName} -} -func (e *xmlEncoder) Encode(node *yaml.Node) error { - switch node.Kind { - case yaml.MappingNode: - err := e.encodeTopLevelMap(node) - if err != nil { - return err - } - var charData xml.CharData = []byte("\n") - err = e.xmlEncoder.EncodeToken(charData) - if err != nil { - return err - } - return e.xmlEncoder.Flush() - case yaml.DocumentNode: - return e.Encode(unwrapDoc(node)) - case yaml.ScalarNode: - var charData xml.CharData = []byte(node.Value) - err := e.xmlEncoder.EncodeToken(charData) - if err != nil { - return err - } - return e.xmlEncoder.Flush() - } - return fmt.Errorf("unsupported type %v", node.Tag) + return &xmlEncoder{attributePrefix, contentName, indentString} } -func (e *xmlEncoder) encodeTopLevelMap(node *yaml.Node) error { +func (e *xmlEncoder) CanHandleAliases() bool { + return false +} + +func (e *xmlEncoder) PrintDocumentSeparator(writer io.Writer) error { + return nil +} + +func (e *xmlEncoder) PrintLeadingContent(writer io.Writer, content string) error { + return nil +} + +func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error { + encoder := xml.NewEncoder(writer) + encoder.Indent("", e.indentString) + + switch node.Kind { + case yaml.MappingNode: + err := e.encodeTopLevelMap(encoder, node) + if err != nil { + return err + } + case yaml.DocumentNode: + err := e.encodeComment(encoder, headAndLineComment(node)) + if err != nil { + return err + } + err = e.encodeTopLevelMap(encoder, unwrapDoc(node)) + if err != nil { + return err + } + err = e.encodeComment(encoder, footComment(node)) + if err != nil { + return err + } + case yaml.ScalarNode: + var charData xml.CharData = []byte(node.Value) + err := encoder.EncodeToken(charData) + if err != nil { + return err + } + return encoder.Flush() + default: + return fmt.Errorf("unsupported type %v", node.Tag) + } + var charData xml.CharData = []byte("\n") + return encoder.EncodeToken(charData) + +} + +func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) error { + err := e.encodeComment(encoder, headAndLineComment(node)) + if err != nil { + return err + } for i := 0; i < len(node.Content); i += 2 { key := node.Content[i] value := node.Content[i+1] start := xml.StartElement{Name: xml.Name{Local: key.Value}} - err := e.doEncode(value, start) + log.Debugf("comments of key %v", key.Value) + err := e.encodeComment(encoder, headAndLineComment(key)) + if err != nil { + return err + } + + log.Debugf("recursing") + + err = e.doEncode(encoder, value, start) + if err != nil { + return err + } + err = e.encodeComment(encoder, footComment(key)) if err != nil { return err } } - return nil + return e.encodeComment(encoder, footComment(node)) } -func (e *xmlEncoder) doEncode(node *yaml.Node, start xml.StartElement) error { +func (e *xmlEncoder) encodeStart(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { + err := encoder.EncodeToken(start) + if err != nil { + return err + } + return e.encodeComment(encoder, headComment(node)) +} + +func (e *xmlEncoder) encodeEnd(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { + err := encoder.EncodeToken(start.End()) + if err != nil { + return err + } + return e.encodeComment(encoder, footComment(node)) +} + +func (e *xmlEncoder) doEncode(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { switch node.Kind { case yaml.MappingNode: - return e.encodeMap(node, start) + return e.encodeMap(encoder, node, start) case yaml.SequenceNode: - return e.encodeArray(node, start) + return e.encodeArray(encoder, node, start) case yaml.ScalarNode: - err := e.xmlEncoder.EncodeToken(start) + err := e.encodeStart(encoder, node, start) if err != nil { return err } var charData xml.CharData = []byte(node.Value) - err = e.xmlEncoder.EncodeToken(charData) - + err = encoder.EncodeToken(charData) if err != nil { return err } - return e.xmlEncoder.EncodeToken(start.End()) + + if err = e.encodeComment(encoder, lineComment(node)); err != nil { + return err + } + + return e.encodeEnd(encoder, node, start) } return fmt.Errorf("unsupported type %v", node.Tag) } -func (e *xmlEncoder) encodeArray(node *yaml.Node, start xml.StartElement) error { - for i := 0; i < len(node.Content); i++ { - value := node.Content[i] - err := e.doEncode(value, start.Copy()) +func (e *xmlEncoder) encodeComment(encoder *xml.Encoder, commentStr string) error { + if commentStr != "" { + log.Debugf("encoding comment %v", commentStr) + if !strings.HasSuffix(commentStr, " ") { + commentStr = commentStr + " " + } + + var comment xml.Comment = []byte(commentStr) + err := encoder.EncodeToken(comment) if err != nil { return err } @@ -99,7 +165,23 @@ func (e *xmlEncoder) encodeArray(node *yaml.Node, start xml.StartElement) error return nil } -func (e *xmlEncoder) encodeMap(node *yaml.Node, start xml.StartElement) error { +func (e *xmlEncoder) encodeArray(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { + + if err := e.encodeComment(encoder, headAndLineComment(node)); err != nil { + return err + } + + for i := 0; i < len(node.Content); i++ { + value := node.Content[i] + if err := e.doEncode(encoder, value, start.Copy()); err != nil { + return err + } + } + return e.encodeComment(encoder, footComment(node)) +} + +func (e *xmlEncoder) encodeMap(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error { + log.Debug("its a map") //first find all the attributes and put them on the start token for i := 0; i < len(node.Content); i += 2 { @@ -116,7 +198,7 @@ func (e *xmlEncoder) encodeMap(node *yaml.Node, start xml.StartElement) error { } } - err := e.xmlEncoder.EncodeToken(start) + err := e.encodeStart(encoder, node, start) if err != nil { return err } @@ -126,21 +208,38 @@ func (e *xmlEncoder) encodeMap(node *yaml.Node, start xml.StartElement) error { key := node.Content[i] value := node.Content[i+1] + err := e.encodeComment(encoder, headAndLineComment(key)) + if err != nil { + return err + } + if !strings.HasPrefix(key.Value, e.attributePrefix) && key.Value != e.contentName { start := xml.StartElement{Name: xml.Name{Local: key.Value}} - err := e.doEncode(value, start) + err := e.doEncode(encoder, value, start) if err != nil { return err } } else if key.Value == e.contentName { // directly encode the contents + err = e.encodeComment(encoder, headAndLineComment(value)) + if err != nil { + return err + } var charData xml.CharData = []byte(value.Value) - err = e.xmlEncoder.EncodeToken(charData) + err = encoder.EncodeToken(charData) + if err != nil { + return err + } + err = e.encodeComment(encoder, footComment(value)) if err != nil { return err } } + err = e.encodeComment(encoder, footComment(key)) + if err != nil { + return err + } } - return e.xmlEncoder.EncodeToken(start.End()) + return e.encodeEnd(encoder, node, start) } diff --git a/pkg/yqlib/encoder_yaml.go b/pkg/yqlib/encoder_yaml.go new file mode 100644 index 00000000..39142c19 --- /dev/null +++ b/pkg/yqlib/encoder_yaml.go @@ -0,0 +1,101 @@ +package yqlib + +import ( + "bufio" + "bytes" + "errors" + "io" + "strings" + + yaml "gopkg.in/yaml.v3" +) + +type yamlEncoder struct { + indent int + colorise bool + printDocSeparators bool + unwrapScalar bool +} + +func NewYamlEncoder(indent int, colorise bool, printDocSeparators bool, unwrapScalar bool) Encoder { + if indent < 0 { + indent = 0 + } + return &yamlEncoder{indent, colorise, printDocSeparators, unwrapScalar} +} + +func (ye *yamlEncoder) CanHandleAliases() bool { + return true +} + +func (ye *yamlEncoder) PrintDocumentSeparator(writer io.Writer) error { + if ye.printDocSeparators { + log.Debug("-- writing doc sep") + if err := writeString(writer, "---\n"); err != nil { + return err + } + } + return nil +} + +func (ye *yamlEncoder) PrintLeadingContent(writer io.Writer, content string) error { + // log.Debug("headcommentwas [%v]", content) + reader := bufio.NewReader(strings.NewReader(content)) + + for { + + readline, errReading := reader.ReadString('\n') + if errReading != nil && !errors.Is(errReading, io.EOF) { + return errReading + } + if strings.Contains(readline, "$yqDocSeperator$") { + + if err := ye.PrintDocumentSeparator(writer); err != nil { + return err + } + + } else { + if err := writeString(writer, readline); err != nil { + return err + } + } + + if errors.Is(errReading, io.EOF) { + if readline != "" { + // the last comment we read didn't have a new line, put one in + if err := writeString(writer, "\n"); err != nil { + return err + } + } + break + } + } + + return nil +} + +func (ye *yamlEncoder) Encode(writer io.Writer, node *yaml.Node) error { + + if node.Kind == yaml.ScalarNode && ye.unwrapScalar { + return writeString(writer, node.Value+"\n") + } + + destination := writer + tempBuffer := bytes.NewBuffer(nil) + if ye.colorise { + destination = tempBuffer + } + + var encoder = yaml.NewEncoder(destination) + + encoder.SetIndent(ye.indent) + + if err := encoder.Encode(node); err != nil { + return err + } + + if ye.colorise { + return colorizeAndPrint(tempBuffer.Bytes(), writer) + } + return nil +} diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 9524dddc..d34b2886 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -18,8 +18,6 @@ type xmlPreferences struct { ContentName string } -var XmlPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content"} - var log = logging.MustGetLogger("yq-lib") // GetLogger returns the yq logger instance. @@ -236,6 +234,22 @@ func createScalarNode(value interface{}, stringValue string) *yaml.Node { return node } +func headAndLineComment(node *yaml.Node) string { + return headComment(node) + lineComment(node) +} + +func headComment(node *yaml.Node) string { + return strings.Replace(node.HeadComment, "#", "", 1) +} + +func lineComment(node *yaml.Node) string { + return strings.Replace(node.LineComment, "#", "", 1) +} + +func footComment(node *yaml.Node) string { + return strings.Replace(node.FootComment, "#", "", 1) +} + func createValueOperation(value interface{}, stringValue string) *Operation { var node *yaml.Node = createScalarNode(value, stringValue) diff --git a/pkg/yqlib/operator_comments.go b/pkg/yqlib/operator_comments.go index ebc78b9d..50a88a82 100644 --- a/pkg/yqlib/operator_comments.go +++ b/pkg/yqlib/operator_comments.go @@ -86,7 +86,8 @@ func getCommentsOperator(d *dataTreeNavigator, context Context, expressionNode * var chompRegexp = regexp.MustCompile(`\n$`) var output bytes.Buffer var writer = bufio.NewWriter(&output) - if err := processLeadingContent(candidate, writer, false, YamlOutputFormat); err != nil { + var encoder = NewYamlEncoder(2, false, false, false) + if err := encoder.PrintLeadingContent(writer, candidate.LeadingContent); err != nil { return Context{}, err } if err := writer.Flush(); err != nil { diff --git a/pkg/yqlib/operator_encoder_decoder.go b/pkg/yqlib/operator_encoder_decoder.go index ea7c51ee..940c87c4 100644 --- a/pkg/yqlib/operator_encoder_decoder.go +++ b/pkg/yqlib/operator_encoder_decoder.go @@ -10,11 +10,31 @@ import ( "gopkg.in/yaml.v3" ) -func yamlToString(candidate *CandidateNode, prefs encoderPreferences) (string, error) { +func configureEncoder(format PrinterOutputFormat, indent int) Encoder { + switch format { + case JsonOutputFormat: + return NewJsonEncoder(indent) + case PropsOutputFormat: + return NewPropertiesEncoder() + case CsvOutputFormat: + return NewCsvEncoder(',') + case TsvOutputFormat: + return NewCsvEncoder('\t') + case YamlOutputFormat: + return NewYamlEncoder(indent, false, true, true) + case XmlOutputFormat: + return NewXmlEncoder(indent, XmlPreferences.AttributePrefix, XmlPreferences.ContentName) + } + panic("invalid encoder") +} + +func encodeToString(candidate *CandidateNode, prefs encoderPreferences) (string, error) { var output bytes.Buffer log.Debug("printing with indent: %v", prefs.indent) - printer := NewPrinterWithSingleWriter(bufio.NewWriter(&output), prefs.format, true, false, prefs.indent, true) + encoder := configureEncoder(prefs.format, prefs.indent) + + printer := NewPrinter(encoder, NewSinglePrinterWriter(bufio.NewWriter(&output))) err := printer.PrintResults(candidate.AsList()) return output.String(), err } @@ -36,7 +56,7 @@ func encodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre for el := context.MatchingNodes.Front(); el != nil; el = el.Next() { candidate := el.Value.(*CandidateNode) - stringValue, err := yamlToString(candidate, preferences) + stringValue, err := encodeToString(candidate, preferences) if err != nil { return Context{}, err diff --git a/pkg/yqlib/operators_test.go b/pkg/yqlib/operators_test.go index c99c63a7..cc697cf0 100644 --- a/pkg/yqlib/operators_test.go +++ b/pkg/yqlib/operators_test.go @@ -26,6 +26,10 @@ type expressionScenario struct { dontFormatInputForDoc bool // dont format input doc for documentation generation } +func NewSimpleYamlPrinter(writer io.Writer, outputFormat PrinterOutputFormat, unwrapScalar bool, colorsEnabled bool, indent int, printDocSeparators bool) Printer { + return NewPrinter(NewYamlEncoder(indent, colorsEnabled, printDocSeparators, unwrapScalar), NewSinglePrinterWriter(writer)) +} + func readDocumentWithLeadingContent(content string, fakefilename string, fakeFileIndex int) (*list.List, error) { reader, firstFileLeadingContent, err := processReadStream(bufio.NewReader(strings.NewReader(content))) if err != nil { @@ -92,7 +96,7 @@ func testScenario(t *testing.T, s *expressionScenario) { func resultToString(t *testing.T, n *CandidateNode) string { var valueBuffer bytes.Buffer - printer := NewPrinterWithSingleWriter(bufio.NewWriter(&valueBuffer), YamlOutputFormat, true, false, 4, true) + printer := NewSimpleYamlPrinter(bufio.NewWriter(&valueBuffer), YamlOutputFormat, true, false, 4, true) err := printer.PrintResults(n.AsList()) if err != nil { @@ -145,7 +149,7 @@ func copyFromHeader(folder string, title string, out *os.File) error { func formatYaml(yaml string, filename string) string { var output bytes.Buffer - printer := NewPrinterWithSingleWriter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true) node, err := NewExpressionParser().ParseExpression(".. style= \"\"") if err != nil { @@ -268,7 +272,7 @@ func documentInput(w *bufio.Writer, s expressionScenario) (string, string) { func documentOutput(t *testing.T, w *bufio.Writer, s expressionScenario, formattedDoc string, formattedDoc2 string) { var output bytes.Buffer var err error - printer := NewPrinterWithSingleWriter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true) node, err := NewExpressionParser().ParseExpression(s.expression) if err != nil { diff --git a/pkg/yqlib/printer.go b/pkg/yqlib/printer.go index 2c1706e8..534dbfd7 100644 --- a/pkg/yqlib/printer.go +++ b/pkg/yqlib/printer.go @@ -48,34 +48,22 @@ func OutputFormatFromString(format string) (PrinterOutputFormat, error) { } type resultsPrinter struct { - outputFormat PrinterOutputFormat - unwrapScalar bool - colorsEnabled bool - indent int - printDocSeparators bool - printerWriter PrinterWriter - firstTimePrinting bool - previousDocIndex uint - previousFileIndex int - printedMatches bool - treeNavigator DataTreeNavigator - appendixReader io.Reader + encoder Encoder + printerWriter PrinterWriter + firstTimePrinting bool + previousDocIndex uint + previousFileIndex int + printedMatches bool + treeNavigator DataTreeNavigator + appendixReader io.Reader } -func NewPrinterWithSingleWriter(writer io.Writer, outputFormat PrinterOutputFormat, unwrapScalar bool, colorsEnabled bool, indent int, printDocSeparators bool) Printer { - return NewPrinter(NewSinglePrinterWriter(writer), outputFormat, unwrapScalar, colorsEnabled, indent, printDocSeparators) -} - -func NewPrinter(printerWriter PrinterWriter, outputFormat PrinterOutputFormat, unwrapScalar bool, colorsEnabled bool, indent int, printDocSeparators bool) Printer { +func NewPrinter(encoder Encoder, printerWriter PrinterWriter) Printer { return &resultsPrinter{ - printerWriter: printerWriter, - outputFormat: outputFormat, - unwrapScalar: unwrapScalar, - colorsEnabled: colorsEnabled, - indent: indent, - printDocSeparators: outputFormat == YamlOutputFormat && printDocSeparators, - firstTimePrinting: true, - treeNavigator: NewDataTreeNavigator(), + encoder: encoder, + printerWriter: printerWriter, + firstTimePrinting: true, + treeNavigator: NewDataTreeNavigator(), } } @@ -90,28 +78,7 @@ func (p *resultsPrinter) PrintedAnything() bool { func (p *resultsPrinter) printNode(node *yaml.Node, writer io.Writer) error { p.printedMatches = p.printedMatches || (node.Tag != "!!null" && (node.Tag != "!!bool" || node.Value != "false")) - - var encoder Encoder - if node.Kind == yaml.ScalarNode && p.unwrapScalar && p.outputFormat == YamlOutputFormat { - return writeString(writer, node.Value+"\n") - } - - switch p.outputFormat { - case JsonOutputFormat: - encoder = NewJsonEncoder(writer, p.indent) - case PropsOutputFormat: - encoder = NewPropertiesEncoder(writer) - case CsvOutputFormat: - encoder = NewCsvEncoder(writer, ',') - case TsvOutputFormat: - encoder = NewCsvEncoder(writer, '\t') - case YamlOutputFormat: - encoder = NewYamlEncoder(writer, p.indent, p.colorsEnabled) - case XmlOutputFormat: - encoder = NewXmlEncoder(writer, p.indent, XmlPreferences.AttributePrefix, XmlPreferences.ContentName) - } - - return encoder.Encode(node) + return p.encoder.Encode(writer, node) } func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { @@ -122,7 +89,7 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { return nil } - if p.outputFormat != YamlOutputFormat { + if !p.encoder.CanHandleAliases() { explodeOp := Operation{OperationType: explodeOpType} explodeNode := ExpressionNode{Operation: &explodeOp} context, err := p.treeNavigator.GetMatchingNodes(Context{MatchingNodes: matchingNodes}, &explodeNode) @@ -142,7 +109,7 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { for el := matchingNodes.Front(); el != nil; el = el.Next() { mappedDoc := el.Value.(*CandidateNode) - log.Debug("-- print sep logic: p.firstTimePrinting: %v, previousDocIndex: %v, mappedDoc.Document: %v, printDocSeparators: %v", p.firstTimePrinting, p.previousDocIndex, mappedDoc.Document, p.printDocSeparators) + log.Debug("-- print sep logic: p.firstTimePrinting: %v, previousDocIndex: %v, mappedDoc.Document: %v", p.firstTimePrinting, p.previousDocIndex, mappedDoc.Document) writer, errorWriting := p.printerWriter.GetWriter(mappedDoc) if errorWriting != nil { @@ -152,14 +119,13 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { commentsStartWithSepExp := regexp.MustCompile(`^\$yqDocSeperator\$`) commentStartsWithSeparator := commentsStartWithSepExp.MatchString(mappedDoc.LeadingContent) - if (p.previousDocIndex != mappedDoc.Document || p.previousFileIndex != mappedDoc.FileIndex) && p.printDocSeparators && !commentStartsWithSeparator { - log.Debug("-- writing doc sep") - if err := writeString(writer, "---\n"); err != nil { + if (p.previousDocIndex != mappedDoc.Document || p.previousFileIndex != mappedDoc.FileIndex) && !commentStartsWithSeparator { + if err := p.encoder.PrintDocumentSeparator(writer); err != nil { return err } } - if err := processLeadingContent(mappedDoc, writer, p.printDocSeparators, p.outputFormat); err != nil { + if err := p.encoder.PrintLeadingContent(writer, mappedDoc.LeadingContent); err != nil { return err } @@ -171,9 +137,11 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { if err := writer.Flush(); err != nil { return err } + log.Debugf("done printing results") } - if p.appendixReader != nil && p.outputFormat == YamlOutputFormat { + // what happens if I remove output format check? + if p.appendixReader != nil { writer, err := p.printerWriter.GetWriter(nil) if err != nil { return err diff --git a/pkg/yqlib/printer_test.go b/pkg/yqlib/printer_test.go index d3172906..d23f9b96 100644 --- a/pkg/yqlib/printer_test.go +++ b/pkg/yqlib/printer_test.go @@ -33,10 +33,10 @@ func nodeToList(candidate *CandidateNode) *list.List { return elMap } -func TestPrinterMultipleDocsInSequence(t *testing.T) { +func TestPrinterMultipleDocsInSequenceOnly(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -74,7 +74,7 @@ func TestPrinterMultipleDocsInSequence(t *testing.T) { func TestPrinterMultipleDocsInSequenceWithLeadingContent(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -116,7 +116,7 @@ func TestPrinterMultipleDocsInSequenceWithLeadingContent(t *testing.T) { func TestPrinterMultipleFilesInSequence(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -163,7 +163,7 @@ func TestPrinterMultipleFilesInSequence(t *testing.T) { func TestPrinterMultipleFilesInSequenceWithLeadingContent(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -213,7 +213,7 @@ func TestPrinterMultipleFilesInSequenceWithLeadingContent(t *testing.T) { func TestPrinterMultipleDocsInSinglePrint(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -232,7 +232,7 @@ func TestPrinterMultipleDocsInSinglePrint(t *testing.T) { func TestPrinterMultipleDocsInSinglePrintWithLeadingDoc(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -261,7 +261,7 @@ a: coconut func TestPrinterMultipleDocsInSinglePrintWithLeadingDocTrailing(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { @@ -287,7 +287,7 @@ a: coconut func TestPrinterScalarWithLeadingCont(t *testing.T) { var output bytes.Buffer var writer = bufio.NewWriter(&output) - printer := NewPrinterWithSingleWriter(writer, YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, true) node, err := NewExpressionParser().ParseExpression(".a") if err != nil { @@ -314,7 +314,7 @@ func TestPrinterMultipleDocsJson(t *testing.T) { var writer = bufio.NewWriter(&output) // note printDocSeparators is true, it should still not print document separators // when outputing JSON. - printer := NewPrinterWithSingleWriter(writer, JsonOutputFormat, true, false, 0, true) + printer := NewPrinter(NewJsonEncoder(0), NewSinglePrinterWriter(writer)) inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder()) if err != nil { diff --git a/pkg/yqlib/utils.go b/pkg/yqlib/utils.go index ecfa6a99..47871f05 100644 --- a/pkg/yqlib/utils.go +++ b/pkg/yqlib/utils.go @@ -38,43 +38,6 @@ func writeString(writer io.Writer, txt string) error { return errorWriting } -func processLeadingContent(mappedDoc *CandidateNode, writer io.Writer, printDocSeparators bool, outputFormat PrinterOutputFormat) error { - log.Debug("headcommentwas %v", mappedDoc.LeadingContent) - log.Debug("finished headcomment") - reader := bufio.NewReader(strings.NewReader(mappedDoc.LeadingContent)) - - for { - - readline, errReading := reader.ReadString('\n') - if errReading != nil && !errors.Is(errReading, io.EOF) { - return errReading - } - if strings.Contains(readline, "$yqDocSeperator$") { - if printDocSeparators { - if err := writeString(writer, "---\n"); err != nil { - return err - } - } - } else if outputFormat == YamlOutputFormat { - if err := writeString(writer, readline); err != nil { - return err - } - } - - if errors.Is(errReading, io.EOF) { - if readline != "" { - // the last comment we read didn't have a new line, put one in - if err := writeString(writer, "\n"); err != nil { - return err - } - } - break - } - } - - return nil -} - func processReadStream(reader *bufio.Reader) (io.Reader, string, error) { var commentLineRegEx = regexp.MustCompile(`^\s*#`) var sb strings.Builder diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index eb0c2c72..2d39c5c3 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -24,23 +24,29 @@ func decodeXml(t *testing.T, xml string) *CandidateNode { return &CandidateNode{Node: node} } -func yamlToXml(sampleYaml string, indent int) string { +func processScenario(s xmlScenario) string { var output bytes.Buffer writer := bufio.NewWriter(&output) - var encoder = NewXmlEncoder(writer, indent, "+", "+content") - inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder()) + var encoder = NewXmlEncoder(2, "+", "+content") + + var decoder = NewYamlDecoder() + if s.scenarioType == "roundtrip" { + decoder = NewXmlDecoder("+", "+content") + } + + inputs, err := readDocuments(strings.NewReader(s.input), "sample.yml", 0, decoder) if err != nil { panic(err) } node := inputs.Front().Value.(*CandidateNode).Node - err = encoder.Encode(node) + err = encoder.Encode(writer, node) if err != nil { panic(err) } writer.Flush() - return strings.TrimSuffix(output.String(), "\n") + return output.String() } type xmlScenario struct { @@ -49,9 +55,154 @@ type xmlScenario struct { description string subdescription string skipDoc bool - encodeScenario bool + scenarioType string } +var inputXmlWithComments = ` + + + + 3 + + + + z + + + + + + +` +var inputXmlWithCommentsWithSubChild = ` + + + + 3 + + + + + + + + + + +` + +var expectedDecodeYamlWithSubChild = `D0, P[], (doc)::# before cat +cat: + # in cat before + x: "3" # multi + # line comment + # for x + # before y + + y: + # in y before + d: + # in d before + z: + +sweet: cool + # in d after + # in y after + # in_cat_after +# after cat +` + +var inputXmlWithCommentsWithArray = ` + + + + 3 + + + + + + + + + + + +` + +var expectedDecodeYamlWithArray = `D0, P[], (doc)::# before cat +cat: + # in cat before + x: "3" # multi + # line comment + # for x + # before y + + y: + # in y before + d: + - # in d before + z: + +sweet: cool + # in d after + - # in d2 before + z: + +sweet: cool2 + # in d2 after + # in y after + # in_cat_after +# after cat +` + +var expectedDecodeYamlWithComments = `D0, P[], (doc)::# before cat +cat: + # in cat before + x: "3" # multi + # line comment + # for x + # before y + + y: + # in y before + # in d before + d: z # in d after + # in y after + # in_cat_after +# after cat +` + +var expectedRoundtripXmlWithComments = ` + 3 + + z + + +` + +var yamlWithComments = `# above_cat +cat: # inline_cat + # above_array + array: # inline_array + - val1 # inline_val1 + # above_val2 + - val2 # inline_val2 +# below_cat +` + +var expectedXmlWithComments = ` + val1 + val2 + +` + var xmlScenarios = []xmlScenario{ { description: "Parse xml: simple", @@ -66,53 +217,88 @@ var xmlScenarios = []xmlScenario{ }, { description: "Parse xml: attributes", - subdescription: "Attributes are converted to fields, with the attribute prefix.", + subdescription: "Attributes are converted to fields, with the default attribute prefix '+'. Use '--xml-attribute-prefix` to set your own.", input: "\n\n 7\n", expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n", }, { description: "Parse xml: attributes with content", - subdescription: "Content is added as a field, using the content name", + subdescription: "Content is added as a field, using the default content name of '+content'. Use `--xml-content-name` to set your own.", input: "\nmeow", expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n", }, { - description: "Encode xml: simple", - input: "cat: purrs", - expected: "purrs", - encodeScenario: true, + description: "Parse xml: with comments", + subdescription: "A best attempt is made to preserve comments.", + input: inputXmlWithComments, + expected: expectedDecodeYamlWithComments, + scenarioType: "decode", }, { - description: "Encode xml: array", - input: "pets:\n cat:\n - purrs\n - meows", - expected: "\n purrs\n meows\n", - encodeScenario: true, + description: "Parse xml: with comments subchild", + skipDoc: true, + input: inputXmlWithCommentsWithSubChild, + expected: expectedDecodeYamlWithSubChild, + scenarioType: "decode", + }, + { + description: "Parse xml: with comments array", + skipDoc: true, + input: inputXmlWithCommentsWithArray, + expected: expectedDecodeYamlWithArray, + scenarioType: "decode", + }, + { + description: "Encode xml: simple", + input: "cat: purrs", + expected: "purrs\n", + scenarioType: "encode", + }, + { + description: "Encode xml: array", + input: "pets:\n cat:\n - purrs\n - meows", + expected: "\n purrs\n meows\n\n", + scenarioType: "encode", }, { description: "Encode xml: attributes", subdescription: "Fields with the matching xml-attribute-prefix are assumed to be attributes.", input: "cat:\n +name: tiger\n meows: true\n", - expected: "\n true\n", - encodeScenario: true, + expected: "\n true\n\n", + scenarioType: "encode", }, { - skipDoc: true, - input: "cat:\n ++name: tiger\n meows: true\n", - expected: "\n true\n", - encodeScenario: true, + skipDoc: true, + input: "cat:\n ++name: tiger\n meows: true\n", + expected: "\n true\n\n", + scenarioType: "encode", }, { description: "Encode xml: attributes with content", subdescription: "Fields with the matching xml-content-name is assumed to be content.", input: "cat:\n +name: tiger\n +content: cool\n", - expected: "cool", - encodeScenario: true, + expected: "cool\n", + scenarioType: "encode", + }, + { + description: "Encode xml: comments", + subdescription: "A best attempt is made to copy comments to xml.", + input: yamlWithComments, + expected: expectedXmlWithComments, + scenarioType: "encode", + }, + { + description: "Round trip: with comments", + subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.", + input: inputXmlWithComments, + expected: expectedRoundtripXmlWithComments, + scenarioType: "roundtrip", }, } -func testXmlScenario(t *testing.T, s *xmlScenario) { - if s.encodeScenario { - test.AssertResultWithContext(t, s.expected, yamlToXml(s.input, 2), s.description) +func testXmlScenario(t *testing.T, s xmlScenario) { + if s.scenarioType == "encode" || s.scenarioType == "roundtrip" { + test.AssertResultWithContext(t, s.expected, processScenario(s), s.description) } else { var actual = resultToString(t, decodeXml(t, s.input)) test.AssertResultWithContext(t, s.expected, actual, s.description) @@ -125,8 +311,10 @@ func documentXmlScenario(t *testing.T, w *bufio.Writer, i interface{}) { if s.skipDoc { return } - if s.encodeScenario { + if s.scenarioType == "encode" { documentXmlEncodeScenario(w, s) + } else if s.scenarioType == "roundtrip" { + documentXmlRoundTripScenario(w, s) } else { documentXmlDecodeScenario(t, w, s) } @@ -149,7 +337,7 @@ func documentXmlDecodeScenario(t *testing.T, w *bufio.Writer, s xmlScenario) { writeOrPanic(w, "will output\n") var output bytes.Buffer - printer := NewPrinterWithSingleWriter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true) + printer := NewSimpleYamlPrinter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true) node := decodeXml(t, s.input) @@ -177,12 +365,30 @@ func documentXmlEncodeScenario(w *bufio.Writer, s xmlScenario) { writeOrPanic(w, "```bash\nyq e -o=xml '.' sample.yml\n```\n") writeOrPanic(w, "will output\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", yamlToXml(s.input, 2))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processScenario(s))) +} + +func documentXmlRoundTripScenario(w *bufio.Writer, s xmlScenario) { + writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) + + if s.subdescription != "" { + writeOrPanic(w, s.subdescription) + writeOrPanic(w, "\n\n") + } + + writeOrPanic(w, "Given a sample.xml file of:\n") + writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input)) + + writeOrPanic(w, "then\n") + writeOrPanic(w, "```bash\nyq e -p=xml -o=xml '.' sample.xml\n```\n") + writeOrPanic(w, "will output\n") + + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processScenario(s))) } func TestXmlScenarios(t *testing.T) { for _, tt := range xmlScenarios { - testXmlScenario(t, &tt) + testXmlScenario(t, tt) } genericScenarios := make([]interface{}, len(xmlScenarios)) for i, s := range xmlScenarios { diff --git a/test/utils.go b/test/utils.go index 8b0d9cd4..c2a1ce7a 100644 --- a/test/utils.go +++ b/test/utils.go @@ -1,6 +1,7 @@ package test import ( + "bufio" "bytes" "fmt" "os" @@ -8,6 +9,8 @@ import ( "strings" "testing" + "github.com/pkg/diff" + "github.com/pkg/diff/write" "github.com/spf13/cobra" yaml "gopkg.in/yaml.v3" ) @@ -63,8 +66,14 @@ func AssertResultComplexWithContext(t *testing.T, expectedValue interface{}, act func AssertResultWithContext(t *testing.T, expectedValue interface{}, actualValue interface{}, context interface{}) { t.Helper() + opts := []write.Option{write.TerminalColor()} if expectedValue != actualValue { t.Error(context) - t.Error(": expected <", expectedValue, "> but got <", actualValue, ">") + var differenceBuffer bytes.Buffer + if err := diff.Text("expected", "actual", expectedValue, actualValue, bufio.NewWriter(&differenceBuffer), opts...); err != nil { + t.Error(err) + } else { + t.Error(differenceBuffer.String()) + } } }