mirror of
https://github.com/mikefarah/yq.git
synced 2025-01-27 08:55:37 +00:00
Added XML decoder
This commit is contained in:
parent
915e9de437
commit
d1adb48674
2
LICENSE
2
LICENSE
@ -1,3 +1,5 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2017 Mike Farah
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
|
@ -6,6 +6,10 @@ var unwrapScalar = true
|
||||
var writeInplace = false
|
||||
var outputToJSON = false
|
||||
var outputFormat = "yaml"
|
||||
var inputFormat = "yaml"
|
||||
|
||||
var xmlAttributePrefix = "+"
|
||||
var xmlContentName = "+content"
|
||||
|
||||
var exitStatus = false
|
||||
var forceColor = false
|
||||
|
@ -75,6 +75,11 @@ func evaluateAll(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
decoder, err := configureDecoder()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
printerWriter := configurePrinterWriter(format, out)
|
||||
|
||||
printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators)
|
||||
@ -99,7 +104,7 @@ func evaluateAll(cmd *cobra.Command, args []string) error {
|
||||
switch len(args) {
|
||||
case 0:
|
||||
if pipingStdIn {
|
||||
err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing)
|
||||
err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing, decoder)
|
||||
} else {
|
||||
cmd.Println(cmd.UsageString())
|
||||
return nil
|
||||
@ -108,10 +113,10 @@ func evaluateAll(cmd *cobra.Command, args []string) error {
|
||||
if nullInput {
|
||||
err = yqlib.NewStreamEvaluator().EvaluateNew(processExpression(args[0]), printer, "")
|
||||
} else {
|
||||
err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing)
|
||||
err = allAtOnceEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing, decoder)
|
||||
}
|
||||
default:
|
||||
err = allAtOnceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing)
|
||||
err = allAtOnceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing, decoder)
|
||||
}
|
||||
|
||||
completedSuccessfully = err == nil
|
||||
|
@ -92,6 +92,11 @@ func evaluateSequence(cmd *cobra.Command, args []string) error {
|
||||
|
||||
printer := yqlib.NewPrinter(printerWriter, format, unwrapScalar, colorsEnabled, indent, !noDocSeparators)
|
||||
|
||||
decoder, err := configureDecoder()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
streamEvaluator := yqlib.NewStreamEvaluator()
|
||||
|
||||
if frontMatter != "" {
|
||||
@ -113,7 +118,7 @@ func evaluateSequence(cmd *cobra.Command, args []string) error {
|
||||
switch len(args) {
|
||||
case 0:
|
||||
if pipingStdIn {
|
||||
err = streamEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing)
|
||||
err = streamEvaluator.EvaluateFiles(processExpression(""), []string{"-"}, printer, leadingContentPreProcessing, decoder)
|
||||
} else {
|
||||
cmd.Println(cmd.UsageString())
|
||||
return nil
|
||||
@ -122,10 +127,10 @@ func evaluateSequence(cmd *cobra.Command, args []string) error {
|
||||
if nullInput {
|
||||
err = streamEvaluator.EvaluateNew(processExpression(args[0]), printer, "")
|
||||
} else {
|
||||
err = streamEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing)
|
||||
err = streamEvaluator.EvaluateFiles(processExpression(""), []string{args[0]}, printer, leadingContentPreProcessing, decoder)
|
||||
}
|
||||
default:
|
||||
err = streamEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing)
|
||||
err = streamEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing, decoder)
|
||||
}
|
||||
completedSuccessfully = err == nil
|
||||
|
||||
|
@ -49,6 +49,11 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`,
|
||||
}
|
||||
|
||||
rootCmd.PersistentFlags().StringVarP(&outputFormat, "output-format", "o", "yaml", "[yaml|y|json|j|props|p] output format type.")
|
||||
rootCmd.PersistentFlags().StringVarP(&inputFormat, "input-format", "p", "yaml", "[yaml|y|xml|x] input format type.")
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
|
||||
rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
|
||||
|
||||
rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating yaml docs from scratch.")
|
||||
rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)")
|
||||
|
||||
|
12
cmd/utils.go
12
cmd/utils.go
@ -45,6 +45,18 @@ func initCommand(cmd *cobra.Command, args []string) (firstFileIndex int, err err
|
||||
return firstFileIndex, nil
|
||||
}
|
||||
|
||||
func configureDecoder() (yqlib.Decoder, error) {
|
||||
yqlibInputFormat, err := yqlib.InputFormatFromString(inputFormat)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch yqlibInputFormat {
|
||||
case yqlib.XmlInputFormat:
|
||||
return yqlib.NewXmlDecoder(xmlAttributePrefix, xmlContentName), nil
|
||||
}
|
||||
return yqlib.NewYamlDecoder(), nil
|
||||
}
|
||||
|
||||
func configurePrinterWriter(format yqlib.PrinterOutputFormat, out io.Writer) yqlib.PrinterWriter {
|
||||
|
||||
var printerWriter yqlib.PrinterWriter
|
||||
|
4
examples/mike.xml
Normal file
4
examples/mike.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<cat>3f</cat>
|
||||
<dog>meow:as</dog>
|
||||
<dog3>true</dog3>
|
11
examples/mike2.xml
Normal file
11
examples/mike2.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- osm-->
|
||||
<osm version="0.6" generator="CGImap 0.0.2">
|
||||
<!-- bounds-->
|
||||
<bounds minlat="54.0889580" minlon="12.2487570" maxlat="54.0913900" maxlon="12.2524800">
|
||||
<!-- great -->
|
||||
cool
|
||||
</bounds>
|
||||
<foo>ba2234r</foo>
|
||||
<foo>bar2234233</foo>
|
||||
</osm>
|
2
go.mod
2
go.mod
@ -8,6 +8,7 @@ require (
|
||||
github.com/magiconair/properties v1.8.5
|
||||
github.com/spf13/cobra v1.3.0
|
||||
github.com/timtadh/lexmachine v0.2.2
|
||||
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d
|
||||
gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
|
||||
)
|
||||
@ -19,6 +20,7 @@ require (
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/timtadh/data-structures v0.5.3 // indirect
|
||||
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d // indirect
|
||||
golang.org/x/text v0.3.7 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
)
|
||||
|
||||
|
2
go.sum
2
go.sum
@ -449,6 +449,7 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd
|
||||
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
|
||||
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
|
||||
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d h1:LO7XpTYMwTqxjLcGWPijK3vRXg1aWdlNOVOHRq45d7c=
|
||||
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
@ -553,6 +554,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
|
||||
// A yaml expression evaluator that runs the expression once against all files/nodes in memory.
|
||||
type Evaluator interface {
|
||||
EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error
|
||||
EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool, decoder Decoder) error
|
||||
|
||||
// EvaluateNodes takes an expression and one or more yaml nodes, returning a list of matching candidate nodes
|
||||
EvaluateNodes(expression string, nodes ...*yaml.Node) (*list.List, error)
|
||||
@ -46,7 +46,7 @@ func (e *allAtOnceEvaluator) EvaluateCandidateNodes(expression string, inputCand
|
||||
return context.MatchingNodes, nil
|
||||
}
|
||||
|
||||
func (e *allAtOnceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error {
|
||||
func (e *allAtOnceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool, decoder Decoder) error {
|
||||
fileIndex := 0
|
||||
firstFileLeadingContent := ""
|
||||
|
||||
@ -61,7 +61,7 @@ func (e *allAtOnceEvaluator) EvaluateFiles(expression string, filenames []string
|
||||
firstFileLeadingContent = leadingContent
|
||||
}
|
||||
|
||||
fileDocuments, err := readDocuments(reader, filename, fileIndex)
|
||||
fileDocuments, err := readDocuments(reader, filename, fileIndex, decoder)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
110
pkg/yqlib/decode_xml_test.go
Normal file
110
pkg/yqlib/decode_xml_test.go
Normal file
@ -0,0 +1,110 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mikefarah/yq/v4/test"
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
func decodeXml(t *testing.T, xml string) *CandidateNode {
|
||||
decoder := NewXmlDecoder("+", "+content")
|
||||
|
||||
decoder.Init(strings.NewReader(xml))
|
||||
|
||||
node := &yaml.Node{}
|
||||
err := decoder.Decode(node)
|
||||
if err != nil {
|
||||
t.Error(err, "fail to decode", xml)
|
||||
}
|
||||
return &CandidateNode{Node: node}
|
||||
}
|
||||
|
||||
type xmlScenario struct {
|
||||
inputXml string
|
||||
expected string
|
||||
description string
|
||||
subdescription string
|
||||
skipDoc bool
|
||||
}
|
||||
|
||||
var xmlScenarios = []xmlScenario{
|
||||
{
|
||||
description: "Parse xml: simple",
|
||||
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat>meow</cat>",
|
||||
expected: "D0, P[], (doc)::cat: meow\n",
|
||||
},
|
||||
{
|
||||
description: "Parse xml: array",
|
||||
subdescription: "Consecutive nodes with identical xml names are assumed to be arrays.",
|
||||
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<animal>1</animal>\n<animal>2</animal>",
|
||||
expected: "D0, P[], (doc)::animal:\n - \"1\"\n - \"2\"\n",
|
||||
},
|
||||
{
|
||||
description: "Parse xml: attributes",
|
||||
subdescription: "Attributes are converted to fields, with the attribute prefix.",
|
||||
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">\n <legs>7</legs>\n</cat>",
|
||||
expected: "D0, P[], (doc)::cat:\n +legs: \"4\"\n legs: \"7\"\n",
|
||||
},
|
||||
{
|
||||
description: "Parse xml: attributes with content",
|
||||
subdescription: "Content is added as a field, using the content name",
|
||||
inputXml: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
|
||||
expected: "D0, P[], (doc)::cat:\n +content: meow\n +legs: \"4\"\n",
|
||||
},
|
||||
}
|
||||
|
||||
func testXmlScenario(t *testing.T, s *xmlScenario) {
|
||||
var actual = resultToString(t, decodeXml(t, s.inputXml))
|
||||
test.AssertResult(t, s.expected, actual)
|
||||
}
|
||||
|
||||
func documentXmlScenario(t *testing.T, w *bufio.Writer, i interface{}) {
|
||||
s := i.(xmlScenario)
|
||||
|
||||
if s.skipDoc {
|
||||
return
|
||||
}
|
||||
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||
|
||||
if s.subdescription != "" {
|
||||
writeOrPanic(w, s.subdescription)
|
||||
writeOrPanic(w, "\n\n")
|
||||
}
|
||||
|
||||
writeOrPanic(w, "Given a sample.xml file of:\n")
|
||||
writeOrPanic(w, fmt.Sprintf("```xml\n%v\n```\n", s.inputXml))
|
||||
|
||||
writeOrPanic(w, "then\n")
|
||||
writeOrPanic(w, "```bash\nyq e sample.xml\n```\n")
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
var output bytes.Buffer
|
||||
printer := NewPrinterWithSingleWriter(bufio.NewWriter(&output), YamlOutputFormat, true, false, 2, true)
|
||||
|
||||
node := decodeXml(t, s.inputXml)
|
||||
|
||||
err := printer.PrintResults(node.AsList())
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", output.String()))
|
||||
|
||||
}
|
||||
|
||||
func TestXmlScenarios(t *testing.T) {
|
||||
for _, tt := range xmlScenarios {
|
||||
testXmlScenario(t, &tt)
|
||||
}
|
||||
genericScenarios := make([]interface{}, len(xmlScenarios))
|
||||
for i, s := range xmlScenarios {
|
||||
genericScenarios[i] = s
|
||||
}
|
||||
documentScenarios(t, "usage", "xml", genericScenarios, documentXmlScenario)
|
||||
}
|
249
pkg/yqlib/decoder_xml.go
Normal file
249
pkg/yqlib/decoder_xml.go
Normal file
@ -0,0 +1,249 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type InputFormat uint
|
||||
|
||||
const (
|
||||
YamlInputFormat = 1 << iota
|
||||
XmlInputFormat
|
||||
)
|
||||
|
||||
func InputFormatFromString(format string) (InputFormat, error) {
|
||||
switch format {
|
||||
case "yaml", "y":
|
||||
return YamlInputFormat, nil
|
||||
case "xml", "x":
|
||||
return XmlInputFormat, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml]", format)
|
||||
}
|
||||
}
|
||||
|
||||
type xmlDecoder struct {
|
||||
reader io.Reader
|
||||
attributePrefix string
|
||||
contentPrefix string
|
||||
finished bool
|
||||
}
|
||||
|
||||
func NewXmlDecoder(attributePrefix string, contentPrefix string) Decoder {
|
||||
if contentPrefix == "" {
|
||||
contentPrefix = "content"
|
||||
}
|
||||
return &xmlDecoder{attributePrefix: attributePrefix, contentPrefix: contentPrefix, finished: false}
|
||||
}
|
||||
|
||||
func (dec *xmlDecoder) Init(reader io.Reader) {
|
||||
dec.reader = reader
|
||||
dec.finished = false
|
||||
}
|
||||
|
||||
func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
|
||||
yamlNode := &yaml.Node{Kind: yaml.SequenceNode}
|
||||
for _, child := range nodes {
|
||||
yamlChild, err := dec.convertToYamlNode(child)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
yamlNode.Content = append(yamlNode.Content, yamlChild)
|
||||
}
|
||||
|
||||
return yamlNode, nil
|
||||
}
|
||||
|
||||
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
|
||||
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.Comment}
|
||||
|
||||
if len(n.Data) > 0 {
|
||||
label := dec.contentPrefix
|
||||
yamlNode.Content = append(yamlNode.Content, createScalarNode(label, label), createScalarNode(n.Data, n.Data))
|
||||
}
|
||||
|
||||
for _, keyValuePair := range n.Children {
|
||||
label := keyValuePair.K
|
||||
children := keyValuePair.V
|
||||
labelNode := createScalarNode(label, label)
|
||||
var valueNode *yaml.Node
|
||||
var err error
|
||||
log.Debug("len of children in %v is %v", label, len(children))
|
||||
if len(children) > 1 {
|
||||
valueNode, err = dec.createSequence(children)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
valueNode, err = dec.convertToYamlNode(children[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
yamlNode.Content = append(yamlNode.Content, labelNode, valueNode)
|
||||
}
|
||||
|
||||
return yamlNode, nil
|
||||
}
|
||||
|
||||
func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
|
||||
if len(n.Children) > 0 {
|
||||
return dec.createMap(n)
|
||||
}
|
||||
scalar := createScalarNode(n.Data, n.Data)
|
||||
scalar.HeadComment = n.Comment
|
||||
return scalar, nil
|
||||
}
|
||||
|
||||
func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error {
|
||||
if dec.finished {
|
||||
return io.EOF
|
||||
}
|
||||
root := &xmlNode{}
|
||||
// cant use xj - it doesn't keep map order.
|
||||
err := dec.decodeXml(root)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
firstNode, err := dec.convertToYamlNode(root)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rootYamlNode.Kind = yaml.DocumentNode
|
||||
rootYamlNode.Content = []*yaml.Node{firstNode}
|
||||
dec.finished = true
|
||||
return nil
|
||||
}
|
||||
|
||||
type xmlNode struct {
|
||||
Children []*xmlChildrenKv
|
||||
Comment string
|
||||
Data string
|
||||
}
|
||||
|
||||
type xmlChildrenKv struct {
|
||||
K string
|
||||
V []*xmlNode
|
||||
}
|
||||
|
||||
// AddChild appends a node to the list of children
|
||||
func (n *xmlNode) AddChild(s string, c *xmlNode) {
|
||||
|
||||
if n.Children == nil {
|
||||
n.Children = make([]*xmlChildrenKv, 0)
|
||||
}
|
||||
log.Debug("looking for %s", s)
|
||||
// see if we can find an existing entry to add to
|
||||
for _, childEntry := range n.Children {
|
||||
if childEntry.K == s {
|
||||
log.Debug("found it, appending an entry%s", s)
|
||||
childEntry.V = append(childEntry.V, c)
|
||||
log.Debug("yay len of children in %v is %v", s, len(childEntry.V))
|
||||
return
|
||||
}
|
||||
}
|
||||
log.Debug("not there, making a new one %s", s)
|
||||
n.Children = append(n.Children, &xmlChildrenKv{K: s, V: []*xmlNode{c}})
|
||||
}
|
||||
|
||||
type element struct {
|
||||
parent *element
|
||||
n *xmlNode
|
||||
label string
|
||||
}
|
||||
|
||||
// this code is heavily based on https://github.com/basgys/goxml2json
|
||||
// main changes are to decode into a structure that preserves the original order
|
||||
// of the map keys.
|
||||
func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
|
||||
xmlDec := xml.NewDecoder(dec.reader)
|
||||
|
||||
// That will convert the charset if the provided XML is non-UTF-8
|
||||
xmlDec.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
// Create first element from the root node
|
||||
elem := &element{
|
||||
parent: nil,
|
||||
n: root,
|
||||
}
|
||||
|
||||
for {
|
||||
t, _ := xmlDec.Token()
|
||||
if t == nil {
|
||||
break
|
||||
}
|
||||
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
// Build new a new current element and link it to its parent
|
||||
elem = &element{
|
||||
parent: elem,
|
||||
n: &xmlNode{},
|
||||
label: se.Name.Local,
|
||||
}
|
||||
|
||||
// Extract attributes as children
|
||||
for _, a := range se.Attr {
|
||||
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
|
||||
}
|
||||
case xml.CharData:
|
||||
// Extract XML data (if any)
|
||||
elem.n.Data = trimNonGraphic(string(se))
|
||||
case xml.EndElement:
|
||||
// And add it to its parent list
|
||||
if elem.parent != nil {
|
||||
elem.parent.n.AddChild(elem.label, elem.n)
|
||||
}
|
||||
|
||||
// Then change the current element to its parent
|
||||
elem = elem.parent
|
||||
case xml.Comment:
|
||||
elem.n.Comment = trimNonGraphic(string(xml.CharData(se)))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// trimNonGraphic returns a slice of the string s, with all leading and trailing
|
||||
// non graphic characters and spaces removed.
|
||||
//
|
||||
// Graphic characters include letters, marks, numbers, punctuation, symbols,
|
||||
// and spaces, from categories L, M, N, P, S, Zs.
|
||||
// Spacing characters are set by category Z and property Pattern_White_Space.
|
||||
func trimNonGraphic(s string) string {
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
|
||||
var first *int
|
||||
var last int
|
||||
for i, r := range []rune(s) {
|
||||
if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
|
||||
continue
|
||||
}
|
||||
|
||||
if first == nil {
|
||||
f := i // copy i
|
||||
first = &f
|
||||
last = i
|
||||
} else {
|
||||
last = i
|
||||
}
|
||||
}
|
||||
|
||||
// If first is nil, it means there are no graphic characters
|
||||
if first == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return string([]rune(s)[*first : last+1])
|
||||
}
|
28
pkg/yqlib/decoder_yaml.go
Normal file
28
pkg/yqlib/decoder_yaml.go
Normal file
@ -0,0 +1,28 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type Decoder interface {
|
||||
Init(reader io.Reader)
|
||||
Decode(node *yaml.Node) error
|
||||
}
|
||||
|
||||
type yamlDecoder struct {
|
||||
decoder yaml.Decoder
|
||||
}
|
||||
|
||||
func NewYamlDecoder() Decoder {
|
||||
return &yamlDecoder{}
|
||||
}
|
||||
|
||||
func (dec *yamlDecoder) Init(reader io.Reader) {
|
||||
dec.decoder = *yaml.NewDecoder(reader)
|
||||
}
|
||||
|
||||
func (dec *yamlDecoder) Decode(rootYamlNode *yaml.Node) error {
|
||||
return dec.decoder.Decode(rootYamlNode)
|
||||
}
|
@ -271,3 +271,19 @@ cat thing1,thing2 true 3.40
|
||||
dog thing3 false 12
|
||||
```
|
||||
|
||||
## Decode a xml encoded string
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
a: <foo>bar</foo>
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq eval '.b = (.a | from_xml)' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
a: <foo>bar</foo>
|
||||
b:
|
||||
foo: bar
|
||||
```
|
||||
|
@ -14,6 +14,7 @@ These operators are useful to process yaml documents that have stringified embed
|
||||
| Properties | | to_props/@props |
|
||||
| CSV | | to_csv/@csv |
|
||||
| TSV | | to_tsv/@tsv |
|
||||
| XML | from_xml | |
|
||||
|
||||
|
||||
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
|
12
pkg/yqlib/doc/usage/headers/xml.md
Normal file
12
pkg/yqlib/doc/usage/headers/xml.md
Normal file
@ -0,0 +1,12 @@
|
||||
# XML
|
||||
|
||||
At the moment, `yq` only supports decoding `xml` (into one of the other supported output formats).
|
||||
|
||||
As yaml does not have the concept of attributes, these are converted to regular fields with a prefix to prevent clobbering. Consecutive xml nodes with the same name are assumed to be arrays.
|
||||
|
||||
All values in XML are assumed to be strings - but you can use `from_yaml` to parse them into their correct types:
|
||||
|
||||
|
||||
```
|
||||
yq e -p=xml '.myNumberField |= from_yaml' my.xml
|
||||
```
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user