Added XML decoder (#1044)

This commit is contained in:
Mike Farah 2021-12-21 15:02:07 +11:00 committed by GitHub
parent 915e9de437
commit df32baedf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
154 changed files with 725 additions and 113 deletions

View File

@ -1,3 +1,5 @@
The MIT License (MIT)
Copyright (c) 2017 Mike Farah Copyright (c) 2017 Mike Farah
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy

View File

@ -6,6 +6,10 @@ var unwrapScalar = true
var writeInplace = false var writeInplace = false
var outputToJSON = false var outputToJSON = false
var outputFormat = "yaml" var outputFormat = "yaml"
var inputFormat = "yaml"
var xmlAttributePrefix = "+"
var xmlContentName = "+content"
var exitStatus = false var exitStatus = false
var forceColor = false var forceColor = false

View File

@ -75,6 +75,11 @@ func evaluateAll(cmd *cobra.Command, args []string) error {
return err return err
} }
decoder, err := configureDecoder()
if err != nil {
return err
}
printerWriter := configurePrinterWriter(format, out) printerWriter := configurePrinterWriter(format, out)
printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators) printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators)
@ -99,7 +104,7 @@ func evaluateAll(cmd *cobra.Command, args []string) error {
switch len(args) { switch len(args) {
case 0: case 0:
if pipingStdIn { if pipingStdIn {
err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing) err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing, decoder)
} else { } else {
cmd.Println(cmd.UsageString()) cmd.Println(cmd.UsageString())
return nil return nil
@ -108,10 +113,10 @@ func evaluateAll(cmd *cobra.Command, args []string) error {
if nullInput { if nullInput {
err = yqlib.NewStreamEvaluator().EvaluateNew(processExpression(args[0]), printer, "") err = yqlib.NewStreamEvaluator().EvaluateNew(processExpression(args[0]), printer, "")
} else { } else {
err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing) err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing, decoder)
} }
default: default:
err = allAtOnceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing) err = allAtOnceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing, decoder)
} }
completedSuccessfully = err == nil completedSuccessfully = err == nil

View File

@ -92,6 +92,11 @@ func evaluateSequence(cmd *cobra.Command, args []string) error {
printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators) printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators)
decoder, err := configureDecoder()
if err != nil {
return err
}
streamEvaluator := yqlib.NewStreamEvaluator() streamEvaluator := yqlib.NewStreamEvaluator()
if frontMatter != "" { if frontMatter != "" {
@ -113,7 +118,7 @@ func evaluateSequence(cmd *cobra.Command, args []string) error {
switch len(args) { switch len(args) {
case 0: case 0:
if pipingStdIn { if pipingStdIn {
err = streamEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing) err = streamEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing, decoder)
} else { } else {
cmd.Println(cmd.UsageString()) cmd.Println(cmd.UsageString())
return nil return nil
@ -122,10 +127,10 @@ func evaluateSequence(cmd *cobra.Command, args []string) error {
if nullInput { if nullInput {
err = streamEvaluator.EvaluateNew(processExpression(args[0]), printer, "") err = streamEvaluator.EvaluateNew(processExpression(args[0]), printer, "")
} else { } else {
err = streamEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing) err = streamEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing, decoder)
} }
default: default:
err = streamEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing) err = streamEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing, decoder)
} }
completedSuccessfully = err == nil completedSuccessfully = err == nil

View File

@ -49,6 +49,11 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`,
} }
rootCmd.PersistentFlags().StringVarP(&outputFormat, "output-format", "o", "yaml", "[yaml|y|json|j|props|p] output format type.") rootCmd.PersistentFlags().StringVarP(&outputFormat, "output-format", "o", "yaml", "[yaml|y|json|j|props|p] output format type.")
rootCmd.PersistentFlags().StringVarP(&inputFormat, "input-format", "p", "yaml", "[yaml|y|xml|x] input format type.")
rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating yaml docs from scratch.") rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating yaml docs from scratch.")
rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)") rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)")

View File

@ -45,6 +45,18 @@ func initCommand(cmd *cobra.Command, args []string) (firstFileIndex int, err err
return firstFileIndex, nil return firstFileIndex, nil
} }
func configureDecoder() (yqlib.Decoder, error) {
yqlibInputFormat, err := yqlib.InputFormatFromString(inputFormat)
if err != nil {
return nil, err
}
switch yqlibInputFormat {
case yqlib.XmlInputFormat:
return yqlib.NewXmlDecoder(xmlAttributePrefix, xmlContentName), nil
}
return yqlib.NewYamlDecoder(), nil
}
func configurePrinterWriter(format yqlib.PrinterOutputFormat, out io.Writer) yqlib.PrinterWriter { func configurePrinterWriter(format yqlib.PrinterOutputFormat, out io.Writer) yqlib.PrinterWriter {
var printerWriter yqlib.PrinterWriter var printerWriter yqlib.PrinterWriter

4
examples/mike.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<cat>3f</cat>
<dog>meow:as</dog>
<dog3>true</dog3>

11
examples/mike2.xml Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- osm-->
<osm version="0.6" generator="CGImap 0.0.2">
<!-- bounds-->
<bounds minlat="54.0889580" minlon="12.2487570" maxlat="54.0913900" maxlon="12.2524800">
<!-- great -->
cool
</bounds>
<foo>ba2234r</foo>
<foo>bar2234233</foo>
</osm>

2
go.mod
View File

@ -8,6 +8,7 @@ require (
github.com/magiconair/properties v1.8.5 github.com/magiconair/properties v1.8.5
github.com/spf13/cobra v1.3.0 github.com/spf13/cobra v1.3.0
github.com/timtadh/lexmachine v0.2.2 github.com/timtadh/lexmachine v0.2.2
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d
gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473 gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
) )
@ -19,6 +20,7 @@ require (
github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/pflag v1.0.5 // indirect
github.com/timtadh/data-structures v0.5.3 // indirect github.com/timtadh/data-structures v0.5.3 // indirect
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d // indirect golang.org/x/sys v0.0.0-20211205182925-97ca703d548d // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
) )

2
go.sum
View File

@ -449,6 +449,7 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d h1:LO7XpTYMwTqxjLcGWPijK3vRXg1aWdlNOVOHRq45d7c=
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -553,6 +554,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

View File

@ -8,7 +8,7 @@ import (
// A yaml expression evaluator that runs the expression once against all files/nodes in memory. // A yaml expression evaluator that runs the expression once against all files/nodes in memory.
type Evaluator interface { type Evaluator interface {
EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool, decoder Decoder) error
// EvaluateNodes takes an expression and one or more yaml nodes, returning a list of matching candidate nodes // EvaluateNodes takes an expression and one or more yaml nodes, returning a list of matching candidate nodes
EvaluateNodes(expression string, nodes ...*yaml.Node) (*list.List, error) EvaluateNodes(expression string, nodes ...*yaml.Node) (*list.List, error)
@ -46,7 +46,7 @@ func (e *allAtOnceEvaluator) EvaluateCandidateNodes(expression string, inputCand
return context.MatchingNodes, nil return context.MatchingNodes, nil
} }
func (e *allAtOnceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error { func (e *allAtOnceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool, decoder Decoder) error {
fileIndex := 0 fileIndex := 0
firstFileLeadingContent := "" firstFileLeadingContent := ""
@ -61,7 +61,7 @@ func (e *allAtOnceEvaluator) EvaluateFiles(expression string, filenames []string
firstFileLeadingContent = leadingContent firstFileLeadingContent = leadingContent
} }
fileDocuments, err := readDocuments(reader, filename, fileIndex) fileDocuments, err := readDocuments(reader, filename, fileIndex, decoder)
if err != nil { if err != nil {
return err return err
} }

View File

@ -0,0 +1,110 @@
package yqlib
import (
"bufio"
"bytes"
"fmt"
"strings"
"testing"
"github.com/mikefarah/yq/v4/test"
yaml "gopkg.in/yaml.v3"
)
func decodeXml(t *testing.T, xml string) *CandidateNode {
decoder := NewXmlDecoder("+", "+content")
decoder.Init(strings.NewReader(xml))
node := &yaml.Node{}
err := decoder.Decode(node)
if err != nil {
t.Error(err, "fail to decode", xml)
}
return &CandidateNode{Node: node}
}
type xmlScenario struct {
inputXml string
expected string
description string
subdescription string
skipDoc bool
}
var xmlScenarios = []xmlScenario{
{
description: "Parse xml: simple",
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>meow</cat>",
expected: "D0, P[], (doc)::cat: meow\n",
},
{
description: "Parse xml: array",
subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>1</animal>\n<animal>2</animal>",
expected: "D0, P[], (doc)::animal:\n - \"1\"\n - \"2\"\n",
},
{
description: "Parse xml: attributes",
subdescription: "Attributes are converted to fields, with the attribute prefix.",
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n",
},
{
description: "Parse xml: attributes with content",
subdescription: "Content is added as a field, using the content name",
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
},
}
func testXmlScenario(t *testing.T, s *xmlScenario) {
var actual = resultToString(t, decodeXml(t, s.inputXml))
test.AssertResult(t, s.expected, actual)
}
func documentXmlScenario(t *testing.T, w *bufio.Writer, i interface{}) {
s := i.(xmlScenario)
if s.skipDoc {
return
}
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.xml file of:\n")
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.inputXml))
writeOrPanic(w, "then\n")
writeOrPanic(w, "```bash\nyq e sample.xml\n```\n")
writeOrPanic(w, "will output\n")
var output bytes.Buffer
printer := NewPrinterWithSingleWriter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true)
node := decodeXml(t, s.inputXml)
err := printer.PrintResults(node.AsList())
if err != nil {
t.Error(err)
return
}
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", output.String()))
}
func TestXmlScenarios(t *testing.T) {
for _, tt := range xmlScenarios {
testXmlScenario(t, &tt)
}
genericScenarios := make([]interface{}, len(xmlScenarios))
for i, s := range xmlScenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "xml", genericScenarios, documentXmlScenario)
}

249
pkg/yqlib/decoder_xml.go Normal file
View File

@ -0,0 +1,249 @@
package yqlib
import (
"encoding/xml"
"fmt"
"io"
"unicode"
"golang.org/x/net/html/charset"
yaml "gopkg.in/yaml.v3"
)
type InputFormat uint
const (
YamlInputFormat = 1 << iota
XmlInputFormat
)
func InputFormatFromString(format string) (InputFormat, error) {
switch format {
case "yaml", "y":
return YamlInputFormat, nil
case "xml", "x":
return XmlInputFormat, nil
default:
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml]", format)
}
}
type xmlDecoder struct {
reader io.Reader
attributePrefix string
contentPrefix string
finished bool
}
func NewXmlDecoder(attributePrefix string, contentPrefix string) Decoder {
if contentPrefix == "" {
contentPrefix = "content"
}
return &xmlDecoder{attributePrefix: attributePrefix, contentPrefix: contentPrefix, finished: false}
}
func (dec *xmlDecoder) Init(reader io.Reader) {
dec.reader = reader
dec.finished = false
}
func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
yamlNode := &yaml.Node{Kind: yaml.SequenceNode}
for _, child := range nodes {
yamlChild, err := dec.convertToYamlNode(child)
if err != nil {
return nil, err
}
yamlNode.Content = append(yamlNode.Content, yamlChild)
}
return yamlNode, nil
}
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.Comment}
if len(n.Data) > 0 {
label := dec.contentPrefix
yamlNode.Content = append(yamlNode.Content, createScalarNode(label, label), createScalarNode(n.Data, n.Data))
}
for _, keyValuePair := range n.Children {
label := keyValuePair.K
children := keyValuePair.V
labelNode := createScalarNode(label, label)
var valueNode *yaml.Node
var err error
log.Debug("len of children in %v is %v", label, len(children))
if len(children) > 1 {
valueNode, err = dec.createSequence(children)
if err != nil {
return nil, err
}
} else {
valueNode, err = dec.convertToYamlNode(children[0])
if err != nil {
return nil, err
}
}
yamlNode.Content = append(yamlNode.Content, labelNode, valueNode)
}
return yamlNode, nil
}
func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
if len(n.Children) > 0 {
return dec.createMap(n)
}
scalar := createScalarNode(n.Data, n.Data)
scalar.HeadComment = n.Comment
return scalar, nil
}
func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error {
if dec.finished {
return io.EOF
}
root := &xmlNode{}
// cant use xj - it doesn't keep map order.
err := dec.decodeXml(root)
if err != nil {
return err
}
firstNode, err := dec.convertToYamlNode(root)
if err != nil {
return err
}
rootYamlNode.Kind = yaml.DocumentNode
rootYamlNode.Content = []*yaml.Node{firstNode}
dec.finished = true
return nil
}
type xmlNode struct {
Children []*xmlChildrenKv
Comment string
Data string
}
type xmlChildrenKv struct {
K string
V []*xmlNode
}
// AddChild appends a node to the list of children
func (n *xmlNode) AddChild(s string, c *xmlNode) {
if n.Children == nil {
n.Children = make([]*xmlChildrenKv, 0)
}
log.Debug("looking for %s", s)
// see if we can find an existing entry to add to
for _, childEntry := range n.Children {
if childEntry.K == s {
log.Debug("found it, appending an entry%s", s)
childEntry.V = append(childEntry.V, c)
log.Debug("yay len of children in %v is %v", s, len(childEntry.V))
return
}
}
log.Debug("not there, making a new one %s", s)
n.Children = append(n.Children, &xmlChildrenKv{K: s, V: []*xmlNode{c}})
}
type element struct {
parent *element
n *xmlNode
label string
}
// this code is heavily based on https://github.com/basgys/goxml2json
// main changes are to decode into a structure that preserves the original order
// of the map keys.
func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
xmlDec := xml.NewDecoder(dec.reader)
// That will convert the charset if the provided XML is non-UTF-8
xmlDec.CharsetReader = charset.NewReaderLabel
// Create first element from the root node
elem := &element{
parent: nil,
n: root,
}
for {
t, _ := xmlDec.Token()
if t == nil {
break
}
switch se := t.(type) {
case xml.StartElement:
// Build new a new current element and link it to its parent
elem = &element{
parent: elem,
n: &xmlNode{},
label: se.Name.Local,
}
// Extract attributes as children
for _, a := range se.Attr {
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
}
case xml.CharData:
// Extract XML data (if any)
elem.n.Data = trimNonGraphic(string(se))
case xml.EndElement:
// And add it to its parent list
if elem.parent != nil {
elem.parent.n.AddChild(elem.label, elem.n)
}
// Then change the current element to its parent
elem = elem.parent
case xml.Comment:
elem.n.Comment = trimNonGraphic(string(xml.CharData(se)))
}
}
return nil
}
// trimNonGraphic returns a slice of the string s, with all leading and trailing
// non graphic characters and spaces removed.
//
// Graphic characters include letters, marks, numbers, punctuation, symbols,
// and spaces, from categories L, M, N, P, S, Zs.
// Spacing characters are set by category Z and property Pattern_White_Space.
func trimNonGraphic(s string) string {
if s == "" {
return s
}
var first *int
var last int
for i, r := range []rune(s) {
if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
continue
}
if first == nil {
f := i // copy i
first = &f
last = i
} else {
last = i
}
}
// If first is nil, it means there are no graphic characters
if first == nil {
return ""
}
return string([]rune(s)[*first : last+1])
}

28
pkg/yqlib/decoder_yaml.go Normal file
View File

@ -0,0 +1,28 @@
package yqlib
import (
"io"
yaml "gopkg.in/yaml.v3"
)
type Decoder interface {
Init(reader io.Reader)
Decode(node *yaml.Node) error
}
type yamlDecoder struct {
decoder yaml.Decoder
}
func NewYamlDecoder() Decoder {
return &yamlDecoder{}
}
func (dec *yamlDecoder) Init(reader io.Reader) {
dec.decoder = *yaml.NewDecoder(reader)
}
func (dec *yamlDecoder) Decode(rootYamlNode *yaml.Node) error {
return dec.decoder.Decode(rootYamlNode)
}

View File

@ -271,3 +271,19 @@ cat thing1,thing2 true 3.40
dog thing3 false 12 dog thing3 false 12
``` ```
## Decode a xml encoded string
Given a sample.yml file of:
```yaml
a: <foo>bar</foo>
```
then
```bash
yq eval '.b = (.a | from_xml)' sample.yml
```
will output
```yaml
a: <foo>bar</foo>
b:
foo: bar
```

View File

@ -14,6 +14,7 @@ These operators are useful to process yaml documents that have stringified embed
| Properties | | to_props/@props | | Properties | | to_props/@props |
| CSV | | to_csv/@csv | | CSV | | to_csv/@csv |
| TSV | | to_tsv/@tsv | | TSV | | to_tsv/@tsv |
| XML | from_xml | |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows). CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).

View File

@ -0,0 +1,12 @@
# XML
At the moment, `yq` only supports decoding `xml` (into one of the other supported output formats).
As yaml does not have the concept of attributes, these are converted to regular fields with a prefix to prevent clobbering. Consecutive xml nodes with the same name are assumed to be arrays.
All values in XML are assumed to be strings - but you can use `from_yaml` to parse them into their correct types:
```
yq e -p=xml '.myNumberField |= from_yaml' my.xml
```

Some files were not shown because too many files have changed in this diff Show More