diff --git a/cmd/root.go b/cmd/root.go index 28e22648..dd2e2751 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,6 +3,7 @@ package cmd import ( "os" + "github.com/mikefarah/yq/v4/pkg/yqlib" "github.com/spf13/cobra" logging "gopkg.in/op/go-logging.v1" ) @@ -37,6 +38,8 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`, } logging.SetBackend(backend) + yqlib.XmlPreferences.AttributePrefix = xmlAttributePrefix + yqlib.XmlPreferences.ContentName = xmlContentName }, } diff --git a/examples/mike.xml b/examples/mike.xml index 99a1afef..2e1813a4 100644 --- a/examples/mike.xml +++ b/examples/mike.xml @@ -1,4 +1,2 @@ -3f -meow:as -true \ No newline at end of file +BiBi \ No newline at end of file diff --git a/pkg/yqlib/doc/operators/encode-decode.md b/pkg/yqlib/doc/operators/encode-decode.md index 1def115e..fc6a4c5d 100644 --- a/pkg/yqlib/doc/operators/encode-decode.md +++ b/pkg/yqlib/doc/operators/encode-decode.md @@ -14,11 +14,13 @@ These operators are useful to process yaml documents that have stringified embed | Properties | | to_props/@props | | CSV | | to_csv/@csv | | TSV | | to_tsv/@tsv | -| XML | from_xml | | +| XML | from_xml | to_xml(i)/@xml | CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows). +XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields. + ## Encode value as json string Given a sample.yml file of: @@ -272,6 +274,61 @@ cat thing1,thing2 true 3.40 dog thing3 false 12 ``` +## Encode value as xml string +Given a sample.yml file of: +```yaml +a: + cool: + foo: bar + +id: hi +``` +then +```bash +yq eval '.a | to_xml' sample.yml +``` +will output +```yaml + + bar + +``` + +## Encode value as xml string on a single line +Given a sample.yml file of: +```yaml +a: + cool: + foo: bar + +id: hi +``` +then +```bash +yq eval '.a | @xml' sample.yml +``` +will output +```yaml +bar +``` + +## Encode value as xml string with custom indentation +Given a sample.yml file of: +```yaml +a: + cool: + foo: bar + +id: hi +``` +then +```bash +yq eval '.a | to_xml(1)' sample.yml +``` +will output +```yaml + + bar + +``` + ## Decode a xml encoded string Given a sample.yml file of: ```yaml diff --git a/pkg/yqlib/doc/operators/headers/encode-decode.md b/pkg/yqlib/doc/operators/headers/encode-decode.md index 847240b3..ede62504 100644 --- a/pkg/yqlib/doc/operators/headers/encode-decode.md +++ b/pkg/yqlib/doc/operators/headers/encode-decode.md @@ -14,8 +14,10 @@ These operators are useful to process yaml documents that have stringified embed | Properties | | to_props/@props | | CSV | | to_csv/@csv | | TSV | | to_tsv/@tsv | -| XML | from_xml | | +| XML | from_xml | to_xml(i)/@xml | CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows). +XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields. + diff --git a/pkg/yqlib/doc/usage/headers/xml.md b/pkg/yqlib/doc/usage/headers/xml.md index 9258afd0..b182a97b 100644 --- a/pkg/yqlib/doc/usage/headers/xml.md +++ b/pkg/yqlib/doc/usage/headers/xml.md @@ -1,8 +1,10 @@ # XML -At the moment, `yq` only supports decoding `xml` (into one of the other supported output formats). +Encode and decode to and from XML. Whitespace is not conserved for round trips - but the order of the fields are. -As yaml does not have the concept of attributes, these are converted to regular fields with a prefix to prevent clobbering. Consecutive xml nodes with the same name are assumed to be arrays. +As yaml does not have the concept of attributes, xml attributes are converted to regular fields with a prefix to prevent clobbering. This defaults to "+", use the `--xml-attribute-prefix` to change. + +Consecutive xml nodes with the same name are assumed to be arrays. All values in XML are assumed to be strings - but you can use `from_yaml` to parse them into their correct types: @@ -10,3 +12,12 @@ All values in XML are assumed to be strings - but you can use `from_yaml` to par ``` yq e -p=xml '.myNumberField |= from_yaml' my.xml ``` + + +XML nodes that have attributes then plain content, e.g: + +```xml +meow +``` + +The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this. diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index 7c37ce25..17c19871 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -1,8 +1,10 @@ # XML -At the moment, `yq` only supports decoding `xml` (into one of the other supported output formats). +Encode and decode to and from XML. Whitespace is not conserved for round trips - but the order of the fields are. -As yaml does not have the concept of attributes, these are converted to regular fields with a prefix to prevent clobbering. Consecutive xml nodes with the same name are assumed to be arrays. +As yaml does not have the concept of attributes, xml attributes are converted to regular fields with a prefix to prevent clobbering. This defaults to "+", use the `--xml-attribute-prefix` to change. + +Consecutive xml nodes with the same name are assumed to be arrays. All values in XML are assumed to be strings - but you can use `from_yaml` to parse them into their correct types: @@ -11,6 +13,15 @@ All values in XML are assumed to be strings - but you can use `from_yaml` to par yq e -p=xml '.myNumberField |= from_yaml' my.xml ``` + +XML nodes that have attributes then plain content, e.g: + +```xml +meow +``` + +The content of the node will be set as a field in the map with the key "+content". Use the `--xml-content-name` flag to change this. + ## Parse xml: simple Given a sample.xml file of: ```xml @@ -97,8 +108,7 @@ yq e -o=xml '.' sample.yml ``` will output ```xml -purrs -``` +purrs``` ## Encode xml: array Given a sample.yml file of: @@ -117,8 +127,7 @@ will output purrs meows - -``` +``` ## Encode xml: attributes Fields with the matching xml-attribute-prefix are assumed to be attributes. @@ -138,8 +147,7 @@ will output ```xml true - -``` +``` ## Encode xml: attributes with content Fields with the matching xml-content-name is assumed to be content. @@ -157,6 +165,5 @@ yq e -o=xml '.' sample.yml ``` will output ```xml -cool -``` +cool``` diff --git a/pkg/yqlib/encoder_xml.go b/pkg/yqlib/encoder_xml.go index 2ebcd577..f1cac841 100644 --- a/pkg/yqlib/encoder_xml.go +++ b/pkg/yqlib/encoder_xml.go @@ -28,12 +28,25 @@ func NewXmlEncoder(writer io.Writer, indent int, attributePrefix string, content func (e *xmlEncoder) Encode(node *yaml.Node) error { switch node.Kind { case yaml.MappingNode: - return e.encodeTopLevelMap(node) + err := e.encodeTopLevelMap(node) + if err != nil { + return err + } + var charData xml.CharData = []byte("\n") + err = e.xmlEncoder.EncodeToken(charData) + if err != nil { + return err + } + return e.xmlEncoder.Flush() case yaml.DocumentNode: return e.Encode(unwrapDoc(node)) case yaml.ScalarNode: var charData xml.CharData = []byte(node.Value) - return e.xmlEncoder.EncodeToken(charData) + err := e.xmlEncoder.EncodeToken(charData) + if err != nil { + return err + } + return e.xmlEncoder.Flush() } return fmt.Errorf("unsupported type %v", node.Tag) } diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go index d812039c..465bf5a5 100644 --- a/pkg/yqlib/expression_tokeniser.go +++ b/pkg/yqlib/expression_tokeniser.go @@ -322,6 +322,9 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`toyaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat)) lexer.Add([]byte(`to_yaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat)) + lexer.Add([]byte(`toxml\([0-9]+\)`), encodeWithIndent(XmlOutputFormat)) + lexer.Add([]byte(`to_xml\([0-9]+\)`), encodeWithIndent(XmlOutputFormat)) + lexer.Add([]byte(`tojson\([0-9]+\)`), encodeWithIndent(JsonOutputFormat)) lexer.Add([]byte(`to_json\([0-9]+\)`), encodeWithIndent(JsonOutputFormat)) @@ -346,6 +349,10 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`to_tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TsvOutputFormat})) lexer.Add([]byte(`@tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TsvOutputFormat})) + lexer.Add([]byte(`toxml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XmlOutputFormat})) + lexer.Add([]byte(`to_xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XmlOutputFormat, indent: 2})) + lexer.Add([]byte(`@xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XmlOutputFormat, indent: 0})) + lexer.Add([]byte(`fromyaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) lexer.Add([]byte(`fromjson`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat})) lexer.Add([]byte(`fromxml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XmlInputFormat})) @@ -359,9 +366,9 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()})) - lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder("+", "+content")})) - lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder("+", "+content")})) - lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder("+", "+content")})) + lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName)})) + lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName)})) + lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName)})) lexer.Add([]byte(`strload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true})) lexer.Add([]byte(`load_str`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true})) diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 42cf2a9c..9524dddc 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -13,6 +13,13 @@ import ( yaml "gopkg.in/yaml.v3" ) +type xmlPreferences struct { + AttributePrefix string + ContentName string +} + +var XmlPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content"} + var log = logging.MustGetLogger("yq-lib") // GetLogger returns the yq logger instance. diff --git a/pkg/yqlib/operator_encoder_decoder.go b/pkg/yqlib/operator_encoder_decoder.go index a702d0f1..ea7c51ee 100644 --- a/pkg/yqlib/operator_encoder_decoder.go +++ b/pkg/yqlib/operator_encoder_decoder.go @@ -82,7 +82,7 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre case YamlInputFormat: decoder = NewYamlDecoder() case XmlInputFormat: - decoder = NewXmlDecoder("+a", "+content") + decoder = NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName) } var results = list.New() diff --git a/pkg/yqlib/operator_encoder_decoder_test.go b/pkg/yqlib/operator_encoder_decoder_test.go index 1c394356..ac1d8dff 100644 --- a/pkg/yqlib/operator_encoder_decoder_test.go +++ b/pkg/yqlib/operator_encoder_decoder_test.go @@ -168,6 +168,30 @@ var encoderDecoderOperatorScenarios = []expressionScenario{ "D0, P[], (doc)::a: \"foo:\\n a: frog\"\n", }, }, + { + description: "Encode value as xml string", + document: `{a: {cool: {foo: "bar", +id: hi}}}`, + expression: `.a | to_xml`, + expected: []string{ + "D0, P[a], (!!str)::\n bar\n\n", + }, + }, + { + description: "Encode value as xml string on a single line", + document: `{a: {cool: {foo: "bar", +id: hi}}}`, + expression: `.a | @xml`, + expected: []string{ + "D0, P[a], (!!str)::bar\n", + }, + }, + { + description: "Encode value as xml string with custom indentation", + document: `{a: {cool: {foo: "bar", +id: hi}}}`, + expression: `.a | to_xml(1)`, + expected: []string{ + "D0, P[a], (!!str)::\n bar\n\n", + }, + }, { description: "Decode a xml encoded string", document: `a: "bar"`, diff --git a/pkg/yqlib/printer.go b/pkg/yqlib/printer.go index ab5d05d4..2c1706e8 100644 --- a/pkg/yqlib/printer.go +++ b/pkg/yqlib/printer.go @@ -43,7 +43,7 @@ func OutputFormatFromString(format string) (PrinterOutputFormat, error) { case "xml", "x": return XmlOutputFormat, nil default: - return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv]", format) + return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml]", format) } } @@ -108,7 +108,7 @@ func (p *resultsPrinter) printNode(node *yaml.Node, writer io.Writer) error { case YamlOutputFormat: encoder = NewYamlEncoder(writer, p.indent, p.colorsEnabled) case XmlOutputFormat: - encoder = NewXmlEncoder(writer, p.indent, "+", "+content") + encoder = NewXmlEncoder(writer, p.indent, XmlPreferences.AttributePrefix, XmlPreferences.ContentName) } return encoder.Encode(node) diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index f3b06562..eb0c2c72 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -110,8 +110,6 @@ var xmlScenarios = []xmlScenario{ }, } -//encode - func testXmlScenario(t *testing.T, s *xmlScenario) { if s.encodeScenario { test.AssertResultWithContext(t, s.expected, yamlToXml(s.input, 2), s.description) @@ -179,7 +177,7 @@ func documentXmlEncodeScenario(w *bufio.Writer, s xmlScenario) { writeOrPanic(w, "```bash\nyq e -o=xml '.' sample.yml\n```\n") writeOrPanic(w, "will output\n") - writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n\n", yamlToXml(s.input, 2))) + writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", yamlToXml(s.input, 2))) } func TestXmlScenarios(t *testing.T) {