Added XML processing instructions and directive support

This commit is contained in:
Mike Farah 2022-10-18 14:15:25 +11:00
parent 14f827b9b7
commit d8da9c8ccc
14 changed files with 463 additions and 141 deletions

View File

@ -7,27 +7,6 @@ a lightweight and portable command-line YAML, JSON and XML processor. `yq` uses
yq is written in go - so you can download a dependency free binary for your platform and you are good to go! If you prefer there are a variety of package managers that can be used as well as Docker and Podman, all listed below.
## Notice for v4.x versions prior to 4.18.1
Since 4.18.1, yq's 'eval/e' command is the _default_ command and no longer needs to be specified.
Older versions will still need to specify 'eval/e'.
Similarly, '-' is no longer required as a filename to read from STDIN (unless reading from one or more files).
TLDR:
Prior to 4.18.1
```bash
yq e '.cool' - < file.yaml
```
4.18+
```bash
yq '.cool' < file.yaml
```
When merging multiple files together, `eval-all/ea` is still required to tell `yq` to run the expression against all the document at once.
## Quick Usage Guide
Read a value:

View File

@ -127,6 +127,7 @@ testInputXmlNamespaces() {
EOL
read -r -d '' expected << EOM
+p_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
@ -140,6 +141,26 @@ EOM
assertEquals "$expected" "$X"
}
testInputXmlRoundtrip() {
cat >test.yml <<EOL
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url">Meow</map>
EOL
read -r -d '' expected << EOM
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url">Meow</map>
EOM
X=$(./yq -p=xml -o=xml test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -p=xml -o=xml test.yml)
assertEquals "$expected" "$X"
}
testInputXmlStrict() {
cat >test.yml <<EOL
@ -153,11 +174,11 @@ testInputXmlStrict() {
</root>
EOL
X=$(./yq -p=xml --xml-strict-mode test.yml 2>&1)
X=$(./yq -p=xml --xml-strict-mode test.yml -o=xml 2>&1)
assertEquals 1 $?
assertEquals "Error: bad file 'test.yml': XML syntax error on line 7: invalid character entity &writer;" "$X"
X=$(./yq ea -p=xml --xml-strict-mode test.yml 2>&1)
X=$(./yq ea -p=xml --xml-strict-mode test.yml -o=xml 2>&1)
assertEquals "Error: bad file 'test.yml': XML syntax error on line 7: invalid character entity &writer;" "$X"
}

View File

@ -8,12 +8,6 @@ var outputToJSON = false
var outputFormat = "yaml"
var inputFormat = "yaml"
var xmlAttributePrefix = "+"
var xmlContentName = "+content"
var xmlStrictMode = false
var xmlKeepNamespace = true
var xmlUseRawToken = true
var exitStatus = false
var forceColor = false
var forceNoColor = false

View File

@ -47,14 +47,18 @@ yq -P sample.json
if verbose {
backend.SetLevel(logging.DEBUG, "")
} else {
backend.SetLevel(logging.ERROR, "")
backend.SetLevel(logging.WARNING, "")
}
logging.SetBackend(backend)
yqlib.InitExpressionParser()
yqlib.XMLPreferences.AttributePrefix = xmlAttributePrefix
yqlib.XMLPreferences.ContentName = xmlContentName
yqlib.XMLPreferences.StrictMode = xmlStrictMode
if (inputFormat == "x" || inputFormat == "xml") &&
outputFormat != "x" && outputFormat != "xml" &&
yqlib.XMLPreferences.AttributePrefix == "+" {
yqlib.GetLogger().Warning("The default xml-attribute-prefix will change in the v4.30 to `+@` to avoid " +
"naming conflicts with the default content name, directive name and proc inst prefix. If you need to keep " +
"`+` please set that value explicityly with --xml-attribute-prefix.")
}
},
}
@ -69,11 +73,15 @@ yq -P sample.json
rootCmd.PersistentFlags().StringVarP(&outputFormat, "output-format", "o", "yaml", "[yaml|y|json|j|props|p|xml|x] output format type.")
rootCmd.PersistentFlags().StringVarP(&inputFormat, "input-format", "p", "yaml", "[yaml|y|props|p|xml|x] parse format for input. Note that json is a subset of yaml.")
rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
rootCmd.PersistentFlags().BoolVar(&xmlStrictMode, "xml-strict-mode", false, "enables strict parsing of XML. See https://pkg.go.dev/encoding/xml for more details.")
rootCmd.PersistentFlags().BoolVar(&xmlKeepNamespace, "xml-keep-namespace", true, "enables keeping namespace after parsing attributes")
rootCmd.PersistentFlags().BoolVar(&xmlUseRawToken, "xml-raw-token", true, "enables using RawToken method instead Token. Commonly disables namespace translations. See https://pkg.go.dev/encoding/xml#Decoder.RawToken for details.")
rootCmd.PersistentFlags().StringVar(&yqlib.XMLPreferences.AttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
rootCmd.PersistentFlags().StringVar(&yqlib.XMLPreferences.ContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
rootCmd.PersistentFlags().BoolVar(&yqlib.XMLPreferences.StrictMode, "xml-strict-mode", false, "enables strict parsing of XML. See https://pkg.go.dev/encoding/xml for more details.")
rootCmd.PersistentFlags().BoolVar(&yqlib.XMLPreferences.KeepNamespace, "xml-keep-namespace", true, "enables keeping namespace after parsing attributes")
rootCmd.PersistentFlags().BoolVar(&yqlib.XMLPreferences.UseRawToken, "xml-raw-token", true, "enables using RawToken method instead Token. Commonly disables namespace translations. See https://pkg.go.dev/encoding/xml#Decoder.RawToken for details.")
rootCmd.PersistentFlags().StringVar(&yqlib.XMLPreferences.ProcInstPrefix, "xml-proc-inst-prefix", "+p_", "prefix for xml processing instructions (e.g. <?xml version=\"1\"?>)")
rootCmd.PersistentFlags().StringVar(&yqlib.XMLPreferences.DirectiveName, "xml-directive-name", "+directive", "name for xml directives (e.g. <!DOCTYPE thing cat>)")
rootCmd.PersistentFlags().BoolVar(&yqlib.XMLPreferences.SkipProcInst, "xml-skip-proc-inst", false, "skip over process instructions (e.g. <?xml version=\"1\"?>)")
rootCmd.PersistentFlags().BoolVar(&yqlib.XMLPreferences.SkipDirectives, "xml-skip-directives", false, "skip over directives (e.g. <!DOCTYPE thing cat>)")
rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating docs from scratch.")
rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)")

View File

@ -63,7 +63,7 @@ func configureDecoder() (yqlib.Decoder, error) {
}
switch yqlibInputFormat {
case yqlib.XMLInputFormat:
return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode, xmlKeepNamespace, xmlUseRawToken), nil
return yqlib.NewXMLDecoder(yqlib.XMLPreferences), nil
case yqlib.PropertiesInputFormat:
return yqlib.NewPropertiesDecoder(), nil
case yqlib.JsonInputFormat:
@ -107,7 +107,7 @@ func configureEncoder(format yqlib.PrinterOutputFormat) yqlib.Encoder {
case yqlib.YamlOutputFormat:
return yqlib.NewYamlEncoder(indent, colorsEnabled, !noDocSeparators, unwrapScalar)
case yqlib.XMLOutputFormat:
return yqlib.NewXMLEncoder(indent, xmlAttributePrefix, xmlContentName)
return yqlib.NewXMLEncoder(indent, yqlib.XMLPreferences)
}
panic("invalid encoder")
}

View File

@ -1,14 +1,7 @@
<!-- before cat -->
<cat>
<!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x>
<y>
<!-- in y before -->
<d><!-- in d before -->4<!-- in d after --></d>
<!-- in y after -->
</y>
<!-- in_cat_after -->
</cat>
<!-- after cat -->
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple>
<?coolioo version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<b>things</b>
</apple>

View File

@ -12,27 +12,16 @@ import (
)
type xmlDecoder struct {
reader io.Reader
readAnything bool
attributePrefix string
contentName string
strictMode bool
keepNamespace bool
useRawToken bool
finished bool
reader io.Reader
readAnything bool
finished bool
prefs xmlPreferences
}
func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool, keepNamespace bool, useRawToken bool) Decoder {
if contentName == "" {
contentName = "content"
}
func NewXMLDecoder(prefs xmlPreferences) Decoder {
return &xmlDecoder{
attributePrefix: attributePrefix,
contentName: contentName,
finished: false,
strictMode: strictMode,
keepNamespace: keepNamespace,
useRawToken: useRawToken,
finished: false,
prefs: prefs,
}
}
@ -67,7 +56,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
yamlNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
if len(n.Data) > 0 {
label := dec.contentName
label := dec.prefs.ContentName
labelNode := createScalarNode(label, label)
labelNode.HeadComment = dec.processComment(n.HeadComment)
labelNode.FootComment = dec.processComment(n.FootComment)
@ -86,9 +75,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
}
// if i == len(n.Children)-1 {
labelNode.FootComment = dec.processComment(keyValuePair.FootComment)
// }
log.Debug("len of children in %v is %v", label, len(children))
if len(children) > 1 {
@ -205,7 +192,7 @@ type element struct {
// of the map keys.
func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
xmlDec := xml.NewDecoder(dec.reader)
xmlDec.Strict = dec.strictMode
xmlDec.Strict = dec.prefs.StrictMode
// That will convert the charset if the provided XML is non-UTF-8
xmlDec.CharsetReader = charset.NewReaderLabel
@ -216,7 +203,7 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
}
getToken := func() (xml.Token, error) {
if dec.useRawToken {
if dec.prefs.UseRawToken {
return xmlDec.RawToken()
}
return xmlDec.Token()
@ -244,12 +231,12 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
// Extract attributes as children
for _, a := range se.Attr {
if dec.keepNamespace {
if dec.prefs.KeepNamespace {
if a.Name.Space != "" {
a.Name.Local = a.Name.Space + ":" + a.Name.Local
}
}
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
elem.n.AddChild(dec.prefs.AttributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
}
case xml.CharData:
// Extract XML data (if any)
@ -282,6 +269,14 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
elem.n.HeadComment = joinFilter([]string{elem.n.HeadComment, commentStr})
}
case xml.ProcInst:
if !dec.prefs.SkipProcInst {
elem.n.AddChild(dec.prefs.ProcInstPrefix+se.Target, &xmlNode{Data: string(se.Inst)})
}
case xml.Directive:
if !dec.prefs.SkipDirectives {
elem.n.AddChild(dec.prefs.DirectiveName, &xmlNode{Data: string(se)})
}
}
}

View File

@ -4,4 +4,37 @@ Encode and decode to and from XML. Whitespace is not conserved for round trips -
Consecutive xml nodes with the same name are assumed to be arrays.
XML content data and attributes are created as fields. This can be controlled by the `'--xml-attribute-prefix` and `--xml-content-name` flags - see below for examples.
XML content data, attributes processing instructions and directives are all created as plain fields.
This can be controlled by:
| Flag | Default |Sample XML |
| -- | -- | -- |
| `--xml-attribute-prefix` | `+` (changing to `+@` soon) | Legs in ```<cat legs="4"/>``` |
| `--xml-content-name` | `+content` | Meow in ```<cat>Meow <fur>true</true></cat>``` |
| `--xml-directive-name` | `+directive` | ```<!DOCTYPE config system "blah">``` |
| `--xml-proc-inst-prefix` | `+p_` | ```<?xml version="1"?>``` |
{% hint style="warning" %}
Default Attribute Prefix will be changing in v4.30!
In order to avoid name conflicts (e.g. having an attribute named "content" will create a field that clashes with the default content name of "+content") the attribute prefix will be changing to "+@".
This will affect users that have not set their own prefix and are not roundtripping XML changes.
{% endhint %}
## Encoder / Decoder flag options
In addition to the above flags, there are the following xml encoder/decoder options controlled by flags:
| Flag | Default | Description |
| -- | -- | -- |
| `--xml-strict-mode` | false | Strict mode enforces the requirements of the XML specification. When switched off the parser allows input containing common mistakes. See [the Golang xml decoder ](https://pkg.go.dev/encoding/xml#Decoder) for more details.|
| `--xml-keep-namespace` | true | Keeps the namespace of attributes |
| `--xml-raw-token` | true | Does not verify that start and end elements match and does not translate name space prefixes to their corresponding URLs. |
| `--xml-skip-proc-inst` | false | Skips over processing instructions, e.g. `<?xml version="1"?>` |
| `--xml-skip-directives` | false | Skips over directives, e.g. ```<!DOCTYPE config system "blah">``` |
See below for examples

View File

@ -4,7 +4,40 @@ Encode and decode to and from XML. Whitespace is not conserved for round trips -
Consecutive xml nodes with the same name are assumed to be arrays.
XML content data and attributes are created as fields. This can be controlled by the `'--xml-attribute-prefix` and `--xml-content-name` flags - see below for examples.
XML content data, attributes processing instructions and directives are all created as plain fields.
This can be controlled by:
| Flag | Default |Sample XML |
| -- | -- | -- |
| `--xml-attribute-prefix` | `+` (changing to `+@` soon) | Legs in ```<cat legs="4"/>``` |
| `--xml-content-name` | `+content` | Meow in ```<cat>Meow <fur>true</true></cat>``` |
| `--xml-directive-name` | `+directive` | ```<!DOCTYPE config system "blah">``` |
| `--xml-proc-inst-prefix` | `+p_` | ```<?xml version="1"?>``` |
{% hint style="warning" %}
Default Attribute Prefix will be changing in v4.30!
In order to avoid name conflicts (e.g. having an attribute named "content" will create a field that clashes with the default content name of "+content") the attribute prefix will be changing to "+@".
This will affect users that have not set their own prefix and are not roundtripping XML changes.
{% endhint %}
## Encoder / Decoder flag options
In addition to the above flags, there are the following xml encoder/decoder options controlled by flags:
| Flag | Default | Description |
| -- | -- | -- |
| `--xml-strict-mode` | false | Strict mode enforces the requirements of the XML specification. When switched off the parser allows input containing common mistakes. See [the Golang xml decoder ](https://pkg.go.dev/encoding/xml#Decoder) for more details.|
| `--xml-keep-namespace` | true | Keeps the namespace of attributes |
| `--xml-raw-token` | true | Does not verify that start and end elements match and does not translate name space prefixes to their corresponding URLs. |
| `--xml-skip-proc-inst` | false | Skips over processing instructions, e.g. `<?xml version="1"?>` |
| `--xml-skip-directives` | false | Skips over directives, e.g. ```<!DOCTYPE config system "blah">``` |
See below for examples
{% hint style="warning" %}
Note that versions prior to 4.18 require the 'eval/e' command to be specified.&#x20;
@ -30,6 +63,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+p_xml: version="1.0" encoding="UTF-8"
cat:
says: meow
legs: "4"
@ -54,6 +88,7 @@ yq -p=xml ' (.. | select(tag == "!!str")) |= from_yaml' sample.xml
```
will output
```yaml
+p_xml: version="1.0" encoding="UTF-8"
cat:
says: meow
legs: 4
@ -75,6 +110,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+p_xml: version="1.0" encoding="UTF-8"
animal:
- cat
- goat
@ -96,6 +132,7 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+p_xml: version="1.0" encoding="UTF-8"
cat:
+legs: "4"
legs: "7"
@ -115,13 +152,14 @@ yq -p=xml '.' sample.xml
```
will output
```yaml
+p_xml: version="1.0" encoding="UTF-8"
cat:
+content: meow
+legs: "4"
```
## Parse xml: custom dtd
DTD entities are ignored.
DTD entities are processed as directives.
Given a sample.xml file of:
```xml
@ -137,12 +175,45 @@ Given a sample.xml file of:
```
then
```bash
yq -p=xml '.' sample.xml
yq -p=xml -o=xml '.' sample.xml
```
will output
```yaml
root:
item: '&writer;&copyright;'
```xml
<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]>
<root>
<item>&amp;writer;&amp;copyright;</item>
</root>
```
## Parse xml: skip custom dtd
DTDs are directives, skip over directives to skip DTDs.
Given a sample.xml file of:
```xml
<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]>
<root>
<item>&writer;&copyright;</item>
</root>
```
then
```bash
yq -p=xml -o=xml --xml-skip-directives '.' sample.xml
```
will output
```xml
<?xml version="1.0"?>
<root>
<item>&amp;writer;&amp;copyright;</item>
</root>
```
## Parse xml: with comments
@ -207,12 +278,14 @@ yq -p=xml -o=xml --xml-keep-namespace '.' sample.xml
```
will output
```xml
<?xml version="1.0"?>
<map xmlns="some-namespace" xmlns:xsi="some-instance" some-instance:schemaLocation="some-url"></map>
```
instead of
```xml
<map xmlns="some-namespace" xsi="some-instance" schemaLocation="some-url"></map>
<?xml version="1.0"?>
<map xmlns="some-namespace" xmlns:xsi="some-instance" some-instance:schemaLocation="some-url"></map>
```
## Parse xml: keep raw attribute namespace
@ -230,11 +303,13 @@ yq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml
```
will output
```xml
<map xmlns="some-namespace" xmlns:xsi="some-instance" xsi:schemaLocation="some-url"></map>
<?xml version="1.0"?>
<map xmlns="some-namespace" xmlns:xsi="some-instance" some-instance:schemaLocation="some-url"></map>
```
instead of
```xml
<?xml version="1.0"?>
<map xmlns="some-namespace" xsi="some-instance" schemaLocation="some-url"></map>
```
@ -339,6 +414,32 @@ will output
</cat><!-- below_cat -->
```
## Encode: doctype and xml declaration
Use the special xml names to add/modify proc instructions and directives.
Given a sample.yml file of:
```yaml
+p_xml: version="1.0"
+directive: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
apple:
+p_coolioo: version="1.0"
+directive: 'CATYPE meow purr puss '
b: things
```
then
```bash
yq -o=xml '.' sample.yml
```
will output
```xml
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
```
## Round trip: with comments
A best effort is made, but comment positions and white space are not preserved perfectly.
@ -380,3 +481,31 @@ in d before -->
</cat><!-- after cat -->
```
## Roundtrip: with doctype and declaration
yq parses XML proc instructions and directives into nodes.
Unfortunately the underlying XML parser loses whitespace information.
Given a sample.xml file of:
```xml
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple>
<?coolioo version="1.0"?>
<!CATYPE meow purr puss >
<b>things</b>
</apple>
```
then
```bash
yq -p=xml -o=xml '.' sample.xml
```
will output
```xml
<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
```

View File

@ -9,21 +9,19 @@ import (
yaml "gopkg.in/yaml.v3"
)
var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content", StrictMode: false, UseRawToken: false}
type xmlEncoder struct {
attributePrefix string
contentName string
indentString string
indentString string
writer io.Writer
prefs xmlPreferences
}
func NewXMLEncoder(indent int, attributePrefix string, contentName string) Encoder {
func NewXMLEncoder(indent int, prefs xmlPreferences) Encoder {
var indentString = ""
for index := 0; index < indent; index++ {
indentString = indentString + " "
}
return &xmlEncoder{attributePrefix, contentName, indentString}
return &xmlEncoder{indentString, nil, prefs}
}
func (e *xmlEncoder) CanHandleAliases() bool {
@ -40,6 +38,8 @@ func (e *xmlEncoder) PrintLeadingContent(writer io.Writer, content string) error
func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error {
encoder := xml.NewEncoder(writer)
// hack so we can manually add newlines to procInst and directives
e.writer = writer
encoder.Indent("", e.indentString)
switch node.Kind {
@ -77,6 +77,23 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error {
}
func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) error {
// make sure <?xml .. ?> processing instructions are encoded first
for i := 0; i < len(node.Content); i += 2 {
key := node.Content[i]
value := node.Content[i+1]
if key.Value == (e.prefs.ProcInstPrefix + "xml") {
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
if err := encoder.EncodeToken(procInst); err != nil {
return err
}
if _, err := e.writer.Write([]byte("\n")); err != nil {
log.Warning("Unable to write newline, skipping: %w", err)
}
}
}
err := e.encodeComment(encoder, headAndLineComment(node))
if err != nil {
return err
@ -92,11 +109,33 @@ func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) er
return err
}
log.Debugf("recursing")
if key.Value == (e.prefs.ProcInstPrefix + "xml") {
// dont double process these.
} else if strings.HasPrefix(key.Value, e.prefs.ProcInstPrefix) {
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
if err := encoder.EncodeToken(procInst); err != nil {
return err
}
if _, err := e.writer.Write([]byte("\n")); err != nil {
log.Warning("Unable to write newline, skipping: %w", err)
}
} else if key.Value == e.prefs.DirectiveName {
var directive xml.Directive = []byte(value.Value)
if err := encoder.EncodeToken(directive); err != nil {
return err
}
if _, err := e.writer.Write([]byte("\n")); err != nil {
log.Warning("Unable to write newline, skipping: %w", err)
}
} else {
err = e.doEncode(encoder, value, start)
if err != nil {
return err
log.Debugf("recursing")
err = e.doEncode(encoder, value, start)
if err != nil {
return err
}
}
err = e.encodeComment(encoder, footComment(key))
if err != nil {
@ -180,6 +219,13 @@ func (e *xmlEncoder) encodeArray(encoder *xml.Encoder, node *yaml.Node, start xm
return e.encodeComment(encoder, footComment(node))
}
func (e *xmlEncoder) isAttribute(name string) bool {
return strings.HasPrefix(name, e.prefs.AttributePrefix) &&
name != e.prefs.ContentName &&
name != e.prefs.DirectiveName &&
!strings.HasPrefix(name, e.prefs.ProcInstPrefix)
}
func (e *xmlEncoder) encodeMap(encoder *xml.Encoder, node *yaml.Node, start xml.StartElement) error {
log.Debug("its a map")
@ -188,9 +234,9 @@ func (e *xmlEncoder) encodeMap(encoder *xml.Encoder, node *yaml.Node, start xml.
key := node.Content[i]
value := node.Content[i+1]
if strings.HasPrefix(key.Value, e.attributePrefix) && key.Value != e.contentName {
if e.isAttribute(key.Value) {
if value.Kind == yaml.ScalarNode {
attributeName := strings.Replace(key.Value, e.attributePrefix, "", 1)
attributeName := strings.Replace(key.Value, e.prefs.AttributePrefix, "", 1)
start.Attr = append(start.Attr, xml.Attr{Name: xml.Name{Local: attributeName}, Value: value.Value})
} else {
return fmt.Errorf("cannot use %v as attribute, only scalars are supported", value.Tag)
@ -212,14 +258,18 @@ func (e *xmlEncoder) encodeMap(encoder *xml.Encoder, node *yaml.Node, start xml.
if err != nil {
return err
}
if !strings.HasPrefix(key.Value, e.attributePrefix) && key.Value != e.contentName {
start := xml.StartElement{Name: xml.Name{Local: key.Value}}
err := e.doEncode(encoder, value, start)
if err != nil {
if strings.HasPrefix(key.Value, e.prefs.ProcInstPrefix) {
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
if err := encoder.EncodeToken(procInst); err != nil {
return err
}
} else if key.Value == e.contentName {
} else if key.Value == e.prefs.DirectiveName {
var directive xml.Directive = []byte(value.Value)
if err := encoder.EncodeToken(directive); err != nil {
return err
}
} else if key.Value == e.prefs.ContentName {
// directly encode the contents
err = e.encodeComment(encoder, headAndLineComment(value))
if err != nil {
@ -234,6 +284,12 @@ func (e *xmlEncoder) encodeMap(encoder *xml.Encoder, node *yaml.Node, start xml.
if err != nil {
return err
}
} else if !e.isAttribute(key.Value) {
start := xml.StartElement{Name: xml.Name{Local: key.Value}}
err := e.doEncode(encoder, value, start)
if err != nil {
return err
}
}
err = e.encodeComment(encoder, footComment(key))
if err != nil {

View File

@ -76,7 +76,7 @@ var participleYqRules = []*participleYqRule{
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},
{"Base64", `@base64`, encodeWithIndent(Base64OutputFormat, 0), 0},
{"LoadXML", `load_?xml|xml_?load`, loadOp(NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode, XMLPreferences.KeepNamespace, XMLPreferences.UseRawToken), false), 0},
{"LoadXML", `load_?xml|xml_?load`, loadOp(NewXMLDecoder(XMLPreferences), false), 0},
{"LoadBase64", `load_?base64`, loadOp(NewBase64Decoder(), false), 0},

View File

@ -27,8 +27,28 @@ type xmlPreferences struct {
StrictMode bool
KeepNamespace bool
UseRawToken bool
ProcInstPrefix string
DirectiveName string
SkipProcInst bool
SkipDirectives bool
}
func NewDefaultXmlPreferences() xmlPreferences {
return xmlPreferences{
AttributePrefix: "+",
ContentName: "+content",
StrictMode: false,
KeepNamespace: true,
UseRawToken: false,
ProcInstPrefix: "+p_",
DirectiveName: "+directive",
SkipProcInst: false,
SkipDirectives: false,
}
}
var XMLPreferences = NewDefaultXmlPreferences()
var log = logging.MustGetLogger("yq-lib")
var PrettyPrintExp = `(... | (select(tag != "!!str"), select(tag == "!!str") | select(test("(?i)^(y|yes|n|no|on|off)$") | not)) ) style=""`

View File

@ -23,7 +23,7 @@ func configureEncoder(format PrinterOutputFormat, indent int) Encoder {
case YamlOutputFormat:
return NewYamlEncoder(indent, false, true, true)
case XMLOutputFormat:
return NewXMLEncoder(indent, XMLPreferences.AttributePrefix, XMLPreferences.ContentName)
return NewXMLEncoder(indent, XMLPreferences)
case Base64OutputFormat:
return NewBase64Encoder()
}
@ -104,12 +104,7 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
case YamlInputFormat:
decoder = NewYamlDecoder()
case XMLInputFormat:
decoder = NewXMLDecoder(
XMLPreferences.AttributePrefix,
XMLPreferences.ContentName,
XMLPreferences.StrictMode,
XMLPreferences.KeepNamespace,
XMLPreferences.UseRawToken)
decoder = NewXMLDecoder(XMLPreferences)
case Base64InputFormat:
decoder = NewBase64Decoder()
case PropertiesInputFormat:

View File

@ -159,13 +159,15 @@ const inputXMLWithNamespacedAttr = `
</map>
`
const expectedYAMLWithNamespacedAttr = `map:
const expectedYAMLWithNamespacedAttr = `+p_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+some-instance:schemaLocation: some-url
`
const expectedYAMLWithRawNamespacedAttr = `map:
const expectedYAMLWithRawNamespacedAttr = `+p_xml: version="1.0"
map:
+xmlns: some-namespace
+xmlns:xsi: some-instance
+xsi:schemaLocation: some-url
@ -181,8 +183,44 @@ const xmlWithCustomDtd = `
<item>&writer;&copyright;</item>
</root>`
const expectedDtd = `root:
item: '&writer;&copyright;'
const expectedDtd = `<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY writer "Blah.">
<!ENTITY copyright "Blah">
]>
<root>
<item>&amp;writer;&amp;copyright;</item>
</root>
`
const expectedSkippedDtd = `<?xml version="1.0"?>
<root>
<item>&amp;writer;&amp;copyright;</item>
</root>
`
const xmlWithProcInstAndDirectives = `<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple>
<?coolioo version="1.0"?>
<!CATYPE meow purr puss >
<b>things</b>
</apple>
`
const yamlWithProcInstAndDirectives = `+p_xml: version="1.0"
+directive: 'DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" '
apple:
+p_coolioo: version="1.0"
+directive: 'CATYPE meow purr puss '
b: things
`
const expectedXmlWithProcInstAndDirectives = `<?xml version="1.0"?>
<!DOCTYPE config SYSTEM "/etc/iwatch/iwatch.dtd" >
<apple><?coolioo version="1.0"?><!CATYPE meow purr puss >
<b>things</b>
</apple>
`
var xmlScenarios = []formatScenario{
@ -190,38 +228,46 @@ var xmlScenarios = []formatScenario{
description: "Parse xml: simple",
subdescription: "Notice how all the values are strings, see the next example on how you can fix that.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>\n <says>meow</says>\n <legs>4</legs>\n <cute>true</cute>\n</cat>",
expected: "cat:\n says: meow\n legs: \"4\"\n cute: \"true\"\n",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: \"4\"\n cute: \"true\"\n",
},
{
description: "Parse xml: number",
subdescription: "All values are assumed to be strings when parsing XML, but you can use the `from_yaml` operator on all the strings values to autoparse into the correct type.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>\n <says>meow</says>\n <legs>4</legs>\n <cute>true</cute>\n</cat>",
expression: " (.. | select(tag == \"!!str\")) |= from_yaml",
expected: "cat:\n says: meow\n legs: 4\n cute: true\n",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n says: meow\n legs: 4\n cute: true\n",
},
{
description: "Parse xml: array",
subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>cat</animal>\n<animal>goat</animal>",
expected: "animal:\n - cat\n - goat\n",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\nanimal:\n - cat\n - goat\n",
},
{
description: "Parse xml: attributes",
subdescription: "Attributes are converted to fields, with the default attribute prefix '+'. Use '--xml-attribute-prefix` to set your own.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
expected: "cat:\n +legs: \"4\"\n legs: \"7\"\n",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +legs: \"4\"\n legs: \"7\"\n",
},
{
description: "Parse xml: attributes with content",
subdescription: "Content is added as a field, using the default content name of `+content`. Use `--xml-content-name` to set your own.",
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
expected: "cat:\n +content: meow\n +legs: \"4\"\n",
expected: "+p_xml: version=\"1.0\" encoding=\"UTF-8\"\ncat:\n +content: meow\n +legs: \"4\"\n",
},
{
description: "Parse xml: custom dtd",
subdescription: "DTD entities are ignored.",
subdescription: "DTD entities are processed as directives.",
input: xmlWithCustomDtd,
expected: expectedDtd,
scenarioType: "roundtrip",
},
{
description: "Parse xml: skip custom dtd",
subdescription: "DTDs are directives, skip over directives to skip DTDs.",
input: xmlWithCustomDtd,
expected: expectedSkippedDtd,
scenarioType: "roundtrip-skip-directives",
},
{
description: "Parse xml: with comments",
@ -322,9 +368,10 @@ var xmlScenarios = []formatScenario{
scenarioType: "encode",
},
{
description: "double prefix",
skipDoc: true,
input: "cat:\n ++name: tiger\n meows: true\n",
expected: "<cat +name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
input: "cat:\n ++@name: tiger\n meows: true\n",
expected: "<cat +@name=\"tiger\">\n <meows>true</meows>\n</cat>\n",
scenarioType: "encode",
},
{
@ -341,6 +388,13 @@ var xmlScenarios = []formatScenario{
expected: expectedXMLWithComments,
scenarioType: "encode",
},
{
description: "Encode: doctype and xml declaration",
subdescription: "Use the special xml names to add/modify proc instructions and directives.",
input: yamlWithProcInstAndDirectives,
expected: expectedXmlWithProcInstAndDirectives,
scenarioType: "encode",
},
{
description: "Round trip: with comments",
subdescription: "A best effort is made, but comment positions and white space are not preserved perfectly.",
@ -348,21 +402,35 @@ var xmlScenarios = []formatScenario{
expected: expectedRoundtripXMLWithComments,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: with doctype and declaration",
subdescription: "yq parses XML proc instructions and directives into nodes.\nUnfortunately the underlying XML parser loses whitespace information.",
input: xmlWithProcInstAndDirectives,
expected: expectedXmlWithProcInstAndDirectives,
scenarioType: "roundtrip",
},
}
func testXMLScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "", "decode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewYamlEncoder(4, false, true, true)), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(XMLPreferences), NewYamlEncoder(4, false, true, true)), s.description)
case "encode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content")), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, XMLPreferences)), s.description)
case "roundtrip":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content")), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(XMLPreferences), NewXMLEncoder(2, XMLPreferences)), s.description)
case "decode-keep-ns":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewYamlEncoder(2, false, true, true)), s.description)
prefs := NewDefaultXmlPreferences()
prefs.KeepNamespace = true
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-raw-token":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewYamlEncoder(2, false, true, true)), s.description)
prefs := NewDefaultXmlPreferences()
prefs.UseRawToken = true
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(prefs), NewYamlEncoder(2, false, true, true)), s.description)
case "roundtrip-skip-directives":
prefs := NewDefaultXmlPreferences()
prefs.SkipDirectives = true
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs)), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
@ -385,6 +453,8 @@ func documentXMLScenario(t *testing.T, w *bufio.Writer, i interface{}) {
documentXMLDecodeKeepNsScenario(w, s)
case "decode-raw-token":
documentXMLDecodeKeepNsRawTokenScenario(w, s)
case "roundtrip-skip-directives":
documentXMLSkipDirectrivesScenario(w, s)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
@ -410,7 +480,7 @@ func documentXMLDecodeScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=xml '%v' sample.xml\n```\n", expression))
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewYamlEncoder(2, false, true, true))))
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(XMLPreferences), NewYamlEncoder(2, false, true, true))))
}
func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s formatScenario) {
@ -427,11 +497,14 @@ func documentXMLDecodeKeepNsScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
prefs := NewDefaultXmlPreferences()
prefs.KeepNamespace = true
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, false), NewXMLEncoder(2, "+", "+content"))))
prefsWithout := NewDefaultXmlPreferences()
prefs.KeepNamespace = false
writeOrPanic(w, "instead of\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefsWithout), NewXMLEncoder(2, prefsWithout))))
}
func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario) {
@ -449,10 +522,16 @@ func documentXMLDecodeKeepNsRawTokenScenario(w *bufio.Writer, s formatScenario)
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-keep-namespace --xml-raw-token '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, true, true), NewXMLEncoder(2, "+", "+content"))))
prefs := NewDefaultXmlPreferences()
prefs.KeepNamespace = true
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs))))
prefsWithout := NewDefaultXmlPreferences()
prefsWithout.KeepNamespace = false
writeOrPanic(w, "instead of\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefsWithout), NewXMLEncoder(2, prefsWithout))))
}
func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
@ -470,7 +549,7 @@ func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "```bash\nyq -o=xml '.' sample.yml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, XMLPreferences))))
}
func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) {
@ -488,7 +567,27 @@ func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false, false, false), NewXMLEncoder(2, "+", "+content"))))
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(XMLPreferences), NewXMLEncoder(2, XMLPreferences))))
}
func documentXMLSkipDirectrivesScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq -p=xml -o=xml --xml-skip-directives '.' sample.xml\n```\n")
writeOrPanic(w, "will output\n")
prefs := NewDefaultXmlPreferences()
prefs.SkipDirectives = true
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder(prefs), NewXMLEncoder(2, prefs))))
}
func TestXMLScenarios(t *testing.T) {