This commit is contained in:
Mike Farah 2021-12-16 22:18:50 +11:00
parent 5f9e6dae76
commit 4571ec825f
4 changed files with 154 additions and 15 deletions

View File

@ -1,3 +1,5 @@
The MIT License (MIT)
Copyright (c) 2017 Mike Farah
Permission is hereby granted, free of charge, to any person obtaining a copy

View File

@ -1,6 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- osm-->
<osm version="0.6" generator="CGImap 0.0.2">
<bounds minlat="54.0889580" minlon="12.2487570" maxlat="54.0913900" maxlon="12.2524800"/>
<!-- bounds-->
<bounds minlat="54.0889580" minlon="12.2487570" maxlat="54.0913900" maxlon="12.2524800">
<!-- great -->
cool
</bounds>
<foo>bar</foo>
<foo>bar23</foo>
</osm>

4
go.mod
View File

@ -1,7 +1,6 @@
module github.com/mikefarah/yq/v4
require (
github.com/basgys/goxml2json v1.1.0
github.com/elliotchance/orderedmap v1.4.0
github.com/fatih/color v1.13.0
github.com/goccy/go-yaml v1.9.4
@ -9,17 +8,18 @@ require (
github.com/magiconair/properties v1.8.5
github.com/spf13/cobra v1.3.0
github.com/timtadh/lexmachine v0.2.2
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d
gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
)
require (
github.com/basgys/goxml2json v1.1.0 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/mattn/go-colorable v0.1.12 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/timtadh/data-structures v0.5.3 // indirect
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d // indirect
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect

View File

@ -1,9 +1,11 @@
package yqlib
import (
"encoding/xml"
"io"
"unicode"
xj "github.com/basgys/goxml2json"
"golang.org/x/net/html/charset"
yaml "gopkg.in/yaml.v3"
)
@ -15,10 +17,10 @@ type xmlDecoder struct {
}
func NewXmlDecoder(reader io.Reader, attributePrefix string, contentPrefix string) Decoder {
return &xmlDecoder{reader: reader, attributePrefix: attributePrefix, contentPrefix: contentPrefix, finished: false}
return &xmlDecoder{reader: reader, attributePrefix: attributePrefix, contentPrefix: "c", finished: false}
}
func (dec *xmlDecoder) createSequence(nodes xj.Nodes) (*yaml.Node, error) {
func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
yamlNode := &yaml.Node{Kind: yaml.SequenceNode}
for _, child := range nodes {
yamlChild, err := dec.convertToYamlNode(child)
@ -31,18 +33,21 @@ func (dec *xmlDecoder) createSequence(nodes xj.Nodes) (*yaml.Node, error) {
return yamlNode, nil
}
func (dec *xmlDecoder) createMap(n *xj.Node) (*yaml.Node, error) {
yamlNode := &yaml.Node{Kind: yaml.MappingNode}
func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
yamlNode := &yaml.Node{Kind: yaml.MappingNode, HeadComment: n.Comment}
if len(n.Data) > 0 {
label := dec.contentPrefix + "content"
label := dec.contentPrefix
yamlNode.Content = append(yamlNode.Content, createScalarNode(label, label), createScalarNode(n.Data, n.Data))
}
for label, children := range n.Children {
for _, keyValuePair := range n.Children {
label := keyValuePair.K
children := keyValuePair.V
labelNode := createScalarNode(label, label)
var valueNode *yaml.Node
var err error
log.Debug("len of children in %v is %v", label, len(children))
if len(children) > 1 {
valueNode, err = dec.createSequence(children)
if err != nil {
@ -60,20 +65,22 @@ func (dec *xmlDecoder) createMap(n *xj.Node) (*yaml.Node, error) {
return yamlNode, nil
}
func (dec *xmlDecoder) convertToYamlNode(n *xj.Node) (*yaml.Node, error) {
if n.IsComplex() {
func (dec *xmlDecoder) convertToYamlNode(n *xmlNode) (*yaml.Node, error) {
if len(n.Children) > 0 {
return dec.createMap(n)
}
return createScalarNode(n.Data, n.Data), nil
scalar := createScalarNode(n.Data, n.Data)
scalar.HeadComment = n.Comment
return scalar, nil
}
func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error {
if dec.finished {
return io.EOF
}
root := &xj.Node{}
root := &xmlNode{}
// cant use xj - it doesn't keep map order.
err := xj.NewDecoder(dec.reader).Decode(root)
err := dec.decodeXml(root)
if err != nil {
return err
@ -88,3 +95,128 @@ func (dec *xmlDecoder) Decode(rootYamlNode *yaml.Node) error {
dec.finished = true
return nil
}
type xmlNode struct {
Children []*xmlChildrenKv
Comment string
Data string
}
type xmlChildrenKv struct {
K string
V []*xmlNode
}
// AddChild appends a node to the list of children
func (n *xmlNode) AddChild(s string, c *xmlNode) {
if n.Children == nil {
n.Children = make([]*xmlChildrenKv, 0)
}
log.Debug("looking for %s", s)
// see if we can find an existing entry to add to
for _, childEntry := range n.Children {
if childEntry.K == s {
log.Debug("found it, appending an entry%s", s)
childEntry.V = append(childEntry.V, c)
log.Debug("yay len of children in %v is %v", s, len(childEntry.V))
return
}
}
log.Debug("not there, making a new one %s", s)
n.Children = append(n.Children, &xmlChildrenKv{K: s, V: []*xmlNode{c}})
}
type element struct {
parent *element
n *xmlNode
label string
}
// this code is heavily based on https://github.com/basgys/goxml2json
// main changes are to decode into a structure that preserves the original order
// of the map keys.
func (dec *xmlDecoder) decodeXml(root *xmlNode) error {
xmlDec := xml.NewDecoder(dec.reader)
// That will convert the charset if the provided XML is non-UTF-8
xmlDec.CharsetReader = charset.NewReaderLabel
// Create first element from the root node
elem := &element{
parent: nil,
n: root,
}
for {
t, _ := xmlDec.Token()
if t == nil {
break
}
switch se := t.(type) {
case xml.StartElement:
// Build new a new current element and link it to its parent
elem = &element{
parent: elem,
n: &xmlNode{},
label: se.Name.Local,
}
// Extract attributes as children
for _, a := range se.Attr {
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &xmlNode{Data: a.Value})
}
case xml.CharData:
// Extract XML data (if any)
elem.n.Data = trimNonGraphic(string(xml.CharData(se)))
case xml.EndElement:
// And add it to its parent list
if elem.parent != nil {
elem.parent.n.AddChild(elem.label, elem.n)
}
// Then change the current element to its parent
elem = elem.parent
case xml.Comment:
elem.n.Comment = trimNonGraphic(string(xml.CharData(se)))
}
}
return nil
}
// trimNonGraphic returns a slice of the string s, with all leading and trailing
// non graphic characters and spaces removed.
//
// Graphic characters include letters, marks, numbers, punctuation, symbols,
// and spaces, from categories L, M, N, P, S, Zs.
// Spacing characters are set by category Z and property Pattern_White_Space.
func trimNonGraphic(s string) string {
if s == "" {
return s
}
var first *int
var last int
for i, r := range []rune(s) {
if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
continue
}
if first == nil {
f := i // copy i
first = &f
last = i
} else {
last = i
}
}
// If first is nil, it means there are no graphic characters
if first == nil {
return ""
}
return string([]rune(s)[*first : last+1])
}