mirror of
https://github.com/mikefarah/yq.git
synced 2024-11-12 05:38:04 +00:00
Disable strict XML parsing by default #1155
This commit is contained in:
parent
3a1e2c7518
commit
bbeae229ca
@ -58,6 +58,27 @@ EOM
|
|||||||
assertEquals "$expected" "$X"
|
assertEquals "$expected" "$X"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
testInputXmlStrict() {
|
||||||
|
cat >test.yml <<EOL
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!DOCTYPE root [
|
||||||
|
<!ENTITY writer "Catherine.">
|
||||||
|
<!ENTITY copyright "(r) Great">
|
||||||
|
]>
|
||||||
|
<root>
|
||||||
|
<item>&writer;©right;</item>
|
||||||
|
</root>
|
||||||
|
EOL
|
||||||
|
|
||||||
|
X=$(./yq -p=xml --xml-strict-mode test.yml 2>&1)
|
||||||
|
assertEquals 1 $?
|
||||||
|
assertEquals "Error: bad file 'test.yml': XML syntax error on line 7: invalid character entity &writer;" "$X"
|
||||||
|
|
||||||
|
X=$(./yq ea -p=xml --xml-strict-mode test.yml 2>&1)
|
||||||
|
assertEquals "Error: bad file 'test.yml': XML syntax error on line 7: invalid character entity &writer;" "$X"
|
||||||
|
}
|
||||||
|
|
||||||
testInputXmlGithubAction() {
|
testInputXmlGithubAction() {
|
||||||
cat >test.yml <<EOL
|
cat >test.yml <<EOL
|
||||||
<cat legs="4">BiBi</cat>
|
<cat legs="4">BiBi</cat>
|
||||||
|
@ -10,6 +10,7 @@ var inputFormat = "yaml"
|
|||||||
|
|
||||||
var xmlAttributePrefix = "+"
|
var xmlAttributePrefix = "+"
|
||||||
var xmlContentName = "+content"
|
var xmlContentName = "+content"
|
||||||
|
var xmlStrictMode = false
|
||||||
|
|
||||||
var exitStatus = false
|
var exitStatus = false
|
||||||
var forceColor = false
|
var forceColor = false
|
||||||
|
@ -54,6 +54,7 @@ yq -P sample.json
|
|||||||
yqlib.InitExpressionParser()
|
yqlib.InitExpressionParser()
|
||||||
yqlib.XMLPreferences.AttributePrefix = xmlAttributePrefix
|
yqlib.XMLPreferences.AttributePrefix = xmlAttributePrefix
|
||||||
yqlib.XMLPreferences.ContentName = xmlContentName
|
yqlib.XMLPreferences.ContentName = xmlContentName
|
||||||
|
yqlib.XMLPreferences.StrictMode = xmlStrictMode
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,6 +71,7 @@ yq -P sample.json
|
|||||||
|
|
||||||
rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
|
rootCmd.PersistentFlags().StringVar(&xmlAttributePrefix, "xml-attribute-prefix", "+", "prefix for xml attributes")
|
||||||
rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
|
rootCmd.PersistentFlags().StringVar(&xmlContentName, "xml-content-name", "+content", "name for xml content (if no attribute name is present).")
|
||||||
|
rootCmd.PersistentFlags().BoolVar(&xmlStrictMode, "xml-strict-mode", false, "enables strict parsing of XML. See https://pkg.go.dev/encoding/xml for more details.")
|
||||||
|
|
||||||
rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating docs from scratch.")
|
rootCmd.PersistentFlags().BoolVarP(&nullInput, "null-input", "n", false, "Don't read input, simply evaluate the expression given. Useful for creating docs from scratch.")
|
||||||
rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)")
|
rootCmd.PersistentFlags().BoolVarP(&noDocSeparators, "no-doc", "N", false, "Don't print document separators (---)")
|
||||||
|
@ -53,7 +53,7 @@ func configureDecoder() (yqlib.Decoder, error) {
|
|||||||
}
|
}
|
||||||
switch yqlibInputFormat {
|
switch yqlibInputFormat {
|
||||||
case yqlib.XMLInputFormat:
|
case yqlib.XMLInputFormat:
|
||||||
return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName), nil
|
return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode), nil
|
||||||
case yqlib.PropertiesInputFormat:
|
case yqlib.PropertiesInputFormat:
|
||||||
return yqlib.NewPropertiesDecoder(), nil
|
return yqlib.NewPropertiesDecoder(), nil
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ package yqlib
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
@ -14,14 +15,15 @@ type xmlDecoder struct {
|
|||||||
reader io.Reader
|
reader io.Reader
|
||||||
attributePrefix string
|
attributePrefix string
|
||||||
contentName string
|
contentName string
|
||||||
|
strictMode bool
|
||||||
finished bool
|
finished bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewXMLDecoder(attributePrefix string, contentName string) Decoder {
|
func NewXMLDecoder(attributePrefix string, contentName string, strictMode bool) Decoder {
|
||||||
if contentName == "" {
|
if contentName == "" {
|
||||||
contentName = "content"
|
contentName = "content"
|
||||||
}
|
}
|
||||||
return &xmlDecoder{attributePrefix: attributePrefix, contentName: contentName, finished: false}
|
return &xmlDecoder{attributePrefix: attributePrefix, contentName: contentName, finished: false, strictMode: strictMode}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dec *xmlDecoder) Init(reader io.Reader) {
|
func (dec *xmlDecoder) Init(reader io.Reader) {
|
||||||
@ -189,7 +191,7 @@ type element struct {
|
|||||||
// of the map keys.
|
// of the map keys.
|
||||||
func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
|
func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
|
||||||
xmlDec := xml.NewDecoder(dec.reader)
|
xmlDec := xml.NewDecoder(dec.reader)
|
||||||
|
xmlDec.Strict = dec.strictMode
|
||||||
// That will convert the charset if the provided XML is non-UTF-8
|
// That will convert the charset if the provided XML is non-UTF-8
|
||||||
xmlDec.CharsetReader = charset.NewReaderLabel
|
xmlDec.CharsetReader = charset.NewReaderLabel
|
||||||
|
|
||||||
@ -201,7 +203,7 @@ func (dec *xmlDecoder) decodeXML(root *xmlNode) error {
|
|||||||
|
|
||||||
for {
|
for {
|
||||||
t, e := xmlDec.Token()
|
t, e := xmlDec.Token()
|
||||||
if e != nil {
|
if e != nil && !errors.Is(e, io.EOF) {
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
if t == nil {
|
if t == nil {
|
||||||
|
@ -120,6 +120,31 @@ cat:
|
|||||||
+legs: "4"
|
+legs: "4"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Parse xml: custom dtd
|
||||||
|
DTD entities are ignored.
|
||||||
|
|
||||||
|
Given a sample.xml file of:
|
||||||
|
```xml
|
||||||
|
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!DOCTYPE root [
|
||||||
|
<!ENTITY writer "Blah.">
|
||||||
|
<!ENTITY copyright "Blah">
|
||||||
|
]>
|
||||||
|
<root>
|
||||||
|
<item>&writer;©right;</item>
|
||||||
|
</root>
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -p=xml '.' sample.xml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
root:
|
||||||
|
item: '&writer;©right;'
|
||||||
|
```
|
||||||
|
|
||||||
## Parse xml: with comments
|
## Parse xml: with comments
|
||||||
A best attempt is made to preserve comments.
|
A best attempt is made to preserve comments.
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import (
|
|||||||
yaml "gopkg.in/yaml.v3"
|
yaml "gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content"}
|
var XMLPreferences = xmlPreferences{AttributePrefix: "+", ContentName: "+content", StrictMode: false}
|
||||||
|
|
||||||
type xmlEncoder struct {
|
type xmlEncoder struct {
|
||||||
attributePrefix string
|
attributePrefix string
|
||||||
|
@ -413,9 +413,9 @@ func initLexer() (*lex.Lexer, error) {
|
|||||||
|
|
||||||
lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()}))
|
lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()}))
|
||||||
|
|
||||||
lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName)}))
|
lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)}))
|
||||||
lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName)}))
|
lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)}))
|
||||||
lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName)}))
|
lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)}))
|
||||||
|
|
||||||
lexer.Add([]byte(`load_base64`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewBase64Decoder()}))
|
lexer.Add([]byte(`load_base64`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewBase64Decoder()}))
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ func InitExpressionParser() {
|
|||||||
type xmlPreferences struct {
|
type xmlPreferences struct {
|
||||||
AttributePrefix string
|
AttributePrefix string
|
||||||
ContentName string
|
ContentName string
|
||||||
|
StrictMode bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var log = logging.MustGetLogger("yq-lib")
|
var log = logging.MustGetLogger("yq-lib")
|
||||||
|
@ -104,7 +104,7 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
|
|||||||
case YamlInputFormat:
|
case YamlInputFormat:
|
||||||
decoder = NewYamlDecoder()
|
decoder = NewYamlDecoder()
|
||||||
case XMLInputFormat:
|
case XMLInputFormat:
|
||||||
decoder = NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName)
|
decoder = NewXMLDecoder(XMLPreferences.AttributePrefix, XMLPreferences.ContentName, XMLPreferences.StrictMode)
|
||||||
case Base64InputFormat:
|
case Base64InputFormat:
|
||||||
decoder = NewBase64Decoder()
|
decoder = NewBase64Decoder()
|
||||||
}
|
}
|
||||||
|
@ -153,6 +153,20 @@ var expectedXMLWithComments = `<!-- above_cat inline_cat --><cat><!-- above_arra
|
|||||||
</cat><!-- below_cat -->
|
</cat><!-- below_cat -->
|
||||||
`
|
`
|
||||||
|
|
||||||
|
var xmlWithCustomDtd = `
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!DOCTYPE root [
|
||||||
|
<!ENTITY writer "Blah.">
|
||||||
|
<!ENTITY copyright "Blah">
|
||||||
|
]>
|
||||||
|
<root>
|
||||||
|
<item>&writer;©right;</item>
|
||||||
|
</root>`
|
||||||
|
|
||||||
|
var expectedDtd = `root:
|
||||||
|
item: '&writer;©right;'
|
||||||
|
`
|
||||||
|
|
||||||
var xmlScenarios = []formatScenario{
|
var xmlScenarios = []formatScenario{
|
||||||
{
|
{
|
||||||
description: "Parse xml: simple",
|
description: "Parse xml: simple",
|
||||||
@ -185,6 +199,12 @@ var xmlScenarios = []formatScenario{
|
|||||||
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
|
input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cat legs=\"4\">meow</cat>",
|
||||||
expected: "cat:\n +content: meow\n +legs: \"4\"\n",
|
expected: "cat:\n +content: meow\n +legs: \"4\"\n",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
description: "Parse xml: custom dtd",
|
||||||
|
subdescription: "DTD entities are ignored.",
|
||||||
|
input: xmlWithCustomDtd,
|
||||||
|
expected: expectedDtd,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
description: "Parse xml: with comments",
|
description: "Parse xml: with comments",
|
||||||
subdescription: "A best attempt is made to preserve comments.",
|
subdescription: "A best attempt is made to preserve comments.",
|
||||||
@ -286,9 +306,9 @@ func testXMLScenario(t *testing.T, s formatScenario) {
|
|||||||
if s.scenarioType == "encode" {
|
if s.scenarioType == "encode" {
|
||||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content")), s.description)
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewXMLEncoder(2, "+", "+content")), s.description)
|
||||||
} else if s.scenarioType == "roundtrip" {
|
} else if s.scenarioType == "roundtrip" {
|
||||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content"), NewXMLEncoder(2, "+", "+content")), s.description)
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false), NewXMLEncoder(2, "+", "+content")), s.description)
|
||||||
} else {
|
} else {
|
||||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content"), NewYamlEncoder(4, false, true, true)), s.description)
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewXMLDecoder("+", "+content", false), NewYamlEncoder(4, false, true, true)), s.description)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -327,7 +347,7 @@ func documentXMLDecodeScenario(w *bufio.Writer, s formatScenario) {
|
|||||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=xml '%v' sample.xml\n```\n", expression))
|
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=xml '%v' sample.xml\n```\n", expression))
|
||||||
writeOrPanic(w, "will output\n")
|
writeOrPanic(w, "will output\n")
|
||||||
|
|
||||||
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content"), NewYamlEncoder(2, false, true, true))))
|
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false), NewYamlEncoder(2, false, true, true))))
|
||||||
}
|
}
|
||||||
|
|
||||||
func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
|
func documentXMLEncodeScenario(w *bufio.Writer, s formatScenario) {
|
||||||
@ -363,7 +383,7 @@ func documentXMLRoundTripScenario(w *bufio.Writer, s formatScenario) {
|
|||||||
writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n")
|
writeOrPanic(w, "```bash\nyq -p=xml -o=xml '.' sample.xml\n```\n")
|
||||||
writeOrPanic(w, "will output\n")
|
writeOrPanic(w, "will output\n")
|
||||||
|
|
||||||
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content"), NewXMLEncoder(2, "+", "+content"))))
|
writeOrPanic(w, fmt.Sprintf("```xml\n%v```\n\n", processFormatScenario(s, NewXMLDecoder("+", "+content", false), NewXMLEncoder(2, "+", "+content"))))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestXMLScenarios(t *testing.T) {
|
func TestXMLScenarios(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user