From b9309a42a453447b3ae082ce58fc7b85ff750e76 Mon Sep 17 00:00:00 2001 From: rndmit Date: Wed, 15 Jun 2022 02:40:31 +0300 Subject: [PATCH] XML decoder additions (#1239) * Add xml-keep-namespace and xml-raw-token features * Add tests * Change flags usage strings * Append docs --- cmd/constant.go | 2 + cmd/root.go | 2 + cmd/utils.go | 2 +- go.sum | 2 - pkg/yqlib/decoder_xml.go | 27 ++++++- pkg/yqlib/doc/usage/xml.md | 46 ++++++++++++ pkg/yqlib/encoder_xml.go | 2 +- pkg/yqlib/expression_tokeniser.go | 6 +- pkg/yqlib/lib.go | 2 + pkg/yqlib/operator_encoder_decoder.go | 7 +- pkg/yqlib/xml_test.go | 104 +++++++++++++++++++++++--- 11 files changed, 181 insertions(+), 21 deletions(-) diff --git a/cmd/constant.go b/cmd/constant.go index 4594f08f..90f957d3 100644 --- a/cmd/constant.go +++ b/cmd/constant.go @@ -11,6 +11,8 @@ var inputFormat = "yaml" var xmlAttributePrefix = "+" var xmlContentName = "+content" var xmlStrictMode = false +var xmlKeepNamespace = false +var xmlUseRawToken = false var exitStatus = false var forceColor = false diff --git a/cmd/root.go b/cmd/root.go index 0f5a54a2..a3452884 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -72,6 +72,8 @@ yq -P sample.json rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes") rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).") rootCmd.PersistentFlags().BoolVar(&xmlStrictMode, "xml-strict-mode", false, "enables strict parsing of XML. See https://pkg.go.dev/encoding/xml for more details.") + rootCmd.PersistentFlags().BoolVar(&xmlKeepNamespace, "xml-keep-namespace", false, "enables keeping namespace after parsing attributes") + rootCmd.PersistentFlags().BoolVar(&xmlUseRawToken, "xml-raw-token", false, "enables using RawToken method instead Token. Commonly disables namespace translations. See https://pkg.go.dev/encoding/xml#Decoder.RawToken for details.") rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating docs from scratch.") rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)") diff --git a/cmd/utils.go b/cmd/utils.go index ab3b1a12..faec5bed 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -63,7 +63,7 @@ func configureDecoder() (yqlib.Decoder, error) { } switch yqlibInputFormat { case yqlib.XMLInputFormat: - return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode), nil + return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode, xmlKeepNamespace, xmlUseRawToken), nil case yqlib.PropertiesInputFormat: return yqlib.NewPropertiesDecoder(), nil } diff --git a/go.sum b/go.sum index 7ef55494..d742be4b 100644 --- a/go.sum +++ b/go.sum @@ -72,7 +72,5 @@ gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473/go.mod h1:N1eN2tsCx gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/yqlib/decoder_xml.go b/pkg/yqlib/decoder_xml.go index 0b951b14..9d98f68b 100644 --- a/pkg/yqlib/decoder_xml.go +++ b/pkg/yqlib/decoder_xml.go @@ -17,14 +17,23 @@ type xmlDecoder struct { attributePrefix string contentName string strictMode bool + keepNamespace bool + useRawToken bool finished bool } -func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool) Decoder { +func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool) Decoder { if contentName == "" { contentName = "content" } - return &xmlDecoder{attributePrefix: attributePrefix, contentName: contentName, finished: false, strictMode: strictMode} + return &xmlDecoder{ + attributePrefix: attributePrefix, + contentName: contentName, + finished: false, + strictMode: strictMode, + keepNamespace: keepNamespace, + useRawToken: useRawToken, + } } func (dec *xmlDecoder) Init(reader io.Reader) { @@ -206,8 +215,15 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error { n: root, } + getToken := func() (xml.Token, error) { + if dec.useRawToken { + return xmlDec.RawToken() + } + return xmlDec.Token() + } + for { - t, e := xmlDec.Token() + t, e := getToken() if e != nil && !errors.Is(e, io.EOF) { return e } @@ -228,6 +244,11 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error { // Extract attributes as children for _, a := range se.Attr { + if dec.keepNamespace { + if a.Name.Space != "" { + a.Name.Local = a.Name.Space + ":" + a.Name.Local + } + } elem.n.AddChild(dec.attributePrefix+a.Name.Local, &xmlNode{Data: a.Value}) } case xml.CharData: diff --git a/pkg/yqlib/doc/usage/xml.md b/pkg/yqlib/doc/usage/xml.md index fb200381..729c8dc1 100644 --- a/pkg/yqlib/doc/usage/xml.md +++ b/pkg/yqlib/doc/usage/xml.md @@ -192,6 +192,52 @@ cat: # after cat ``` +## Parse xml: keep attribute namespace +Given a sample.xml file of: +```xml + + + + + +``` +then +```bash +yq -p=xml -o=xml --xml-keep-namespace '.' sample.xml +``` +will output +```xml + +``` + +instead of +```xml + +``` + +## Parse xml: keep raw attribute namespace +Given a sample.xml file of: +```xml + + + + + +``` +then +```bash +yq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml +``` +will output +```xml + +``` + +instead of +```xml + +``` + ## Encode xml: simple Given a sample.yml file of: ```yaml diff --git a/pkg/yqlib/encoder_xml.go b/pkg/yqlib/encoder_xml.go index 5ac9973f..b84b59de 100644 --- a/pkg/yqlib/encoder_xml.go +++ b/pkg/yqlib/encoder_xml.go @@ -9,7 +9,7 @@ import ( yaml "gopkg.in/yaml.v3" ) -var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content", StrictMode: false} +var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content", StrictMode: false, UseRawToken: false} type xmlEncoder struct { attributePrefix string diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go index f6a2364c..657bfffb 100644 --- a/pkg/yqlib/expression_tokeniser.go +++ b/pkg/yqlib/expression_tokeniser.go @@ -420,9 +420,9 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()})) - lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)})) - lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)})) - lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)})) + lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)})) + lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)})) + lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken)})) lexer.Add([]byte(`load_base64`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewBase64Decoder()})) diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index f662d15c..ef4be5fd 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -26,6 +26,8 @@ type xmlPreferences struct { AttributePrefix string ContentName string StrictMode bool + KeepNamespace bool + UseRawToken bool } var log = logging.MustGetLogger("yq-lib") diff --git a/pkg/yqlib/operator_encoder_decoder.go b/pkg/yqlib/operator_encoder_decoder.go index 3ccac2f1..ac4fd73f 100644 --- a/pkg/yqlib/operator_encoder_decoder.go +++ b/pkg/yqlib/operator_encoder_decoder.go @@ -104,7 +104,12 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre case YamlInputFormat: decoder = NewYamlDecoder() case XMLInputFormat: - decoder = NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode) + decoder = NewXMLDecoder( + XMLPreferences.AttributePrefix, + XMLPreferences.ContentName, + XMLPreferences.StrictMode, + XMLPreferences.KeepNamespace, + XMLPreferences.UseRawToken) case Base64InputFormat: decoder = NewBase64Decoder() case PropertiesInputFormat: diff --git a/pkg/yqlib/xml_test.go b/pkg/yqlib/xml_test.go index 7e00f918..276f7006 100644 --- a/pkg/yqlib/xml_test.go +++ b/pkg/yqlib/xml_test.go @@ -153,6 +153,24 @@ var expectedXMLWithComments = ` ` +var inputXMLWithNamespacedAttr = ` + + + +` + +var expectedYAMLWithNamespacedAttr = `map: + +xmlns: some-namespace + +xmlns:xsi: some-instance + +some-instance:schemaLocation: some-url +` + +var expectedYAMLWithRawNamespacedAttr = `map: + +xmlns: some-namespace + +xmlns:xsi: some-instance + +xsi:schemaLocation: some-url +` + var xmlWithCustomDtd = `