mirror of
https://github.com/mikefarah/yq.git
synced 2024-11-12 05:38:04 +00:00
Add `--nul-output|-0
` flag to separate element with NUL character (#1550)
This is to ensure solid parsing of complex data (with any binary content except NUL chars) by separating the `yq` root collection member's output with NUL char. As a safe-guard, an error will be cast if trying to use NUL character with content that contains itself NUL characters inside.
This commit is contained in:
parent
311622d14b
commit
5fd2890d1b
286
acceptance_tests/nul-separator.sh
Executable file
286
acceptance_tests/nul-separator.sh
Executable file
@ -0,0 +1,286 @@
|
||||
#!/bin/bash
|
||||
|
||||
setUp() {
|
||||
rm test*.yml || true
|
||||
}
|
||||
|
||||
## Convenient bash shortcut to read records of NUL separated values
|
||||
## from stdin the safe way. See example usage in the next tests.
|
||||
read-0() {
|
||||
local eof="" IFS=''
|
||||
while [ "$1" ]; do
|
||||
## - The `-r` avoids bad surprise with '\n' and other interpreted
|
||||
## sequences that can be read.
|
||||
## - The `-d ''` is the (strange?) way to refer to NUL delimiter.
|
||||
## - The `--` is how to avoid unpleasant surprises if your
|
||||
## "$1" starts with "-" (minus) sign. This protection also
|
||||
## will produce a readable error if you want to try to start
|
||||
## your variable names with a "-".
|
||||
read -r -d '' -- "$1" || eof=1
|
||||
shift
|
||||
done
|
||||
[ -z "$eof" ] ## fail on EOF
|
||||
}
|
||||
|
||||
## Convenient bash shortcut to be used with the next function `p-err`
|
||||
## to read NUL separated values the safe way AND catch any errors from
|
||||
## the process creating the stream of NUL separated data. See example
|
||||
## usage in the tests.
|
||||
read-0-err() {
|
||||
local ret="$1" eof="" idx=0 last=
|
||||
read -r -- "${ret?}" <<<"0"
|
||||
shift
|
||||
while [ "$1" ]; do
|
||||
last=$idx
|
||||
read -r -d '' -- "$1" || {
|
||||
## Put this last value in ${!ret}
|
||||
eof="$1"
|
||||
read -r -- "$ret" <<<"${!eof}"
|
||||
break
|
||||
}
|
||||
((idx++))
|
||||
shift
|
||||
done
|
||||
[ -z "$eof" ] || {
|
||||
if [ "$last" != 0 ]; then
|
||||
## Uhoh, we have no idea if the errorlevel of the internal
|
||||
## command was properly delimited with a NUL char, and
|
||||
## anyway something went really wrong at least about the
|
||||
## number of fields separated by NUL char and the one
|
||||
## expected.
|
||||
echo "Error: read-0-err couldn't fill all value $ret = '${!ret}', '$eof', '${!eof}'" >&2
|
||||
read -r -- "$ret" <<<"not-enough-values"
|
||||
else
|
||||
if ! [[ "${!ret}" =~ ^[0-9]+$ && "${!ret}" -ge 0 && "${!ret}" -le 127 ]]; then
|
||||
## This could happen if you don't use `p-err` wrapper,
|
||||
## or used stdout in unexpected ways in your inner
|
||||
## command.
|
||||
echo "Error: last value is not a number, did you finish with an errorlevel ?" >&2
|
||||
read -r -- "$ret" <<<"last-value-not-a-number"
|
||||
fi
|
||||
fi
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
## Simply runs command given as argument and adds errorlevel in the
|
||||
## standard output. Is expected to be used in tandem with
|
||||
## `read-0-err`.
|
||||
p-err() {
|
||||
local exp="$1"
|
||||
"$@"
|
||||
printf "%s" "$?"
|
||||
}
|
||||
|
||||
wyq-r() {
|
||||
local exp="$1"
|
||||
./yq e -0 -r=false "$1"
|
||||
printf "%s" "$?"
|
||||
}
|
||||
|
||||
testBasicUsageRaw() {
|
||||
cat >test.yml <<EOL
|
||||
a: foo
|
||||
b: bar
|
||||
EOL
|
||||
|
||||
printf "foo\0bar\0" > expected.out
|
||||
|
||||
## We need to compare binary content here. We have to filter the compared
|
||||
## content through a representation that gets rid of NUL chars but accurately
|
||||
## transcribe the content.
|
||||
## Also as it would be nice to have a pretty output in case the test fails,
|
||||
## we use here 'hd': a widely available shortcut to 'hexdump' that will
|
||||
## pretty-print any binary to it's hexadecimal representation.
|
||||
##
|
||||
## Note that the standard `assertEquals` compare its arguments
|
||||
## value, but they can't hold NUL characters (this comes from the
|
||||
## limitation of the C API of `exec*(..)` functions that requires
|
||||
## `const char *arv[]`). And these are NUL terminated strings. As a
|
||||
## consequence, the NUL characters gets removed in bash arguments.
|
||||
assertEquals "$(hd expected.out)" \
|
||||
"$(./yq e -0 '.a, .b' test.yml | hd)"
|
||||
|
||||
rm expected.out
|
||||
}
|
||||
|
||||
testBasicUsage() {
|
||||
local a b
|
||||
cat >test.yml <<EOL
|
||||
a: foo
|
||||
b: bar
|
||||
EOL
|
||||
|
||||
## We provide 2 values, and ask to fill 2 variables.
|
||||
read-0 a b < <(./yq e -0 '.a, .b' test.yml)
|
||||
assertEquals "$?" "0" ## Everything is fine
|
||||
assertEquals "foo" "$a" ## Values are correctly parsed
|
||||
assertEquals "bar" "$b"
|
||||
|
||||
a=YYY ; b=XXX
|
||||
## Not enough values provided to fill `a` and `b`.
|
||||
read-0 a b < <(./yq e -0 '.a' test.yml)
|
||||
assertEquals "$?" "1" ## An error was emitted
|
||||
assertEquals "foo" "$a" ## First value was correctly parsed
|
||||
assertEquals "" "$b" ## Second was still reset
|
||||
|
||||
## Error from inner command are not catchable !. Use
|
||||
## `read-0-err`/`p-err` for that.
|
||||
read-0 a < <(printf "\0"; ./yq e -0 'xxx' test.yml; )
|
||||
assertEquals "$?" "0"
|
||||
|
||||
}
|
||||
|
||||
testBasicUsageJson() {
|
||||
cat >test.yml <<EOL
|
||||
a:
|
||||
x: foo
|
||||
b: bar
|
||||
EOL
|
||||
|
||||
read-0 a b < <(./yq e -0 -o=json '.a, .b' test.yml)
|
||||
|
||||
assertEquals '{
|
||||
"x": "foo"
|
||||
}' "$a"
|
||||
assertEquals '"bar"' "$b"
|
||||
|
||||
}
|
||||
|
||||
testFailWithValueContainingNUL() {
|
||||
local a b c
|
||||
## Note that value of field 'a' actually contains a NUL char !
|
||||
cat >test.yml <<EOL
|
||||
a: "foo\u0000bar"
|
||||
b: 1
|
||||
c: |
|
||||
wiz
|
||||
boom
|
||||
EOL
|
||||
|
||||
## We are looking for trouble with asking to separated fields with NUL
|
||||
## char and requested value `.a` actually contains itself a NUL char !
|
||||
read-0 a b c < <(./yq e -0 '.a, .b, .c' test.yml)
|
||||
assertNotEquals "0" "$?" ## read-0 failed to fill all values
|
||||
|
||||
## But here, we can request for one value, even if `./yq` fails
|
||||
read-0 b < <(./yq e -0 '.b, .a' test.yml)
|
||||
assertEquals "0" "$?" ## read-0 succeeds at feeding the first value
|
||||
## Note: to catch the failure of `yq`, see in the next tests the usage
|
||||
## of `read-0-err`.
|
||||
|
||||
## using -r=false solves any NUL containing value issues, but keeps
|
||||
## all in YAML representation:
|
||||
read-0 a b c < <(./yq e -0 -r=false '.a, .b, .c' test.yml)
|
||||
assertEquals "0" "$?" ## All goes well despite asking for `a` value
|
||||
|
||||
assertEquals '"foo\0bar"' "$a" ## This is a YAML string representation
|
||||
assertEquals '1' "$b"
|
||||
assertEquals '|
|
||||
wiz
|
||||
boom' "$c"
|
||||
}
|
||||
|
||||
testStandardLoop() {
|
||||
local E a b res
|
||||
|
||||
## Here everything is normal: 4 values, that will be paired
|
||||
## in key/values.
|
||||
cat >test.yml <<EOL
|
||||
- yay
|
||||
- wiz
|
||||
- hop
|
||||
- pow
|
||||
EOL
|
||||
|
||||
res=""
|
||||
while read-0-err E a b; do
|
||||
res+="$a: $b;"
|
||||
done < <(p-err ./yq -0 '.[]' test.yml)
|
||||
|
||||
assertEquals "0" "$E" ## errorlevel of internal command
|
||||
assertEquals "yay: wiz;hop: pow;" "$res" ## expected result
|
||||
}
|
||||
|
||||
testStandardLoopWithoutEnoughValues() {
|
||||
local E a b res
|
||||
|
||||
## Here 5 values, there will be a missing value when reading
|
||||
## pairs of value.
|
||||
cat >test.yml <<EOL
|
||||
- yay
|
||||
- wiz
|
||||
- hop
|
||||
- pow
|
||||
- kwak
|
||||
EOL
|
||||
|
||||
res=""
|
||||
## The loop will succeed 2 times then fail
|
||||
while read-0-err E a b; do
|
||||
res+="$a: $b;"
|
||||
done < <(p-err ./yq -0 '.[]' test.yml)
|
||||
|
||||
assertEquals "not-enough-values" "$E" ## Not enough value error
|
||||
assertEquals "yay: wiz;hop: pow;" "$res" ## the 2 full key/value pairs
|
||||
|
||||
}
|
||||
|
||||
testStandardLoopWithInternalCmdError() {
|
||||
local E a b res
|
||||
|
||||
## Note the third value contains a NUL char !
|
||||
cat >test.yml <<EOL
|
||||
- yay
|
||||
- wiz
|
||||
- "foo\0bar"
|
||||
- hop
|
||||
- pow
|
||||
EOL
|
||||
|
||||
res=""
|
||||
## It should be only upon the second pass in the loop that
|
||||
## read-0-err will catch the fact that there is an error !
|
||||
while read-0-err E a b; do
|
||||
res+="$a: $b;"
|
||||
done < <(p-err ./yq -0 '.[]' test.yml)
|
||||
assertEquals "1" "$E" ## Internal command errorlevel (from `./yq`)
|
||||
assertEquals "yay: wiz;" "$res" ## first 2 values were ok at least
|
||||
|
||||
}
|
||||
|
||||
testStandardLoopNotEnoughErrorEatsCmdError() {
|
||||
local E a b res
|
||||
|
||||
## Because of possible edge cases where the internal errorlevel
|
||||
## reported by `p-err` in the standard output might be mangled
|
||||
## with the unfinished record, `read-0-err E ...` will NOT report
|
||||
## the internal command error in the variable E and instead will
|
||||
## store the value 'not-enough-values'. In real world, anyway, you
|
||||
## will want to react the same if the internal command failed
|
||||
## and/or you didn't get as much values as expected while
|
||||
## reading. Keep in mind also that standard error is not
|
||||
## swallowed, so you can read reports from the inner command AND
|
||||
## from `read-0-err`.
|
||||
|
||||
## Here, note that the fourth value contains a NUL char !
|
||||
cat >test.yml <<EOL
|
||||
- yay
|
||||
- wiz
|
||||
- hop
|
||||
- "foo\0bar"
|
||||
- pow
|
||||
EOL
|
||||
|
||||
res=""
|
||||
## It should be only upon the second loop that read-0-err will catch
|
||||
## the fact that there are not enough data to fill the requested variables
|
||||
while read-0-err E a b; do
|
||||
res+="$a: $b;"
|
||||
done < <(p-err ./yq -0 '.[]' test.yml)
|
||||
assertEquals "not-enough-values" "$E" ## Not enough values error eats internal error !
|
||||
assertEquals "yay: wiz;" "$res" ## first 2 values were ok at least
|
||||
}
|
||||
|
||||
|
||||
source ./scripts/shunit2
|
@ -18,6 +18,7 @@ var colorsEnabled = false
|
||||
var indent = 2
|
||||
var noDocSeparators = false
|
||||
var nullInput = false
|
||||
var nulSepOutput = false
|
||||
var verbose = false
|
||||
var version = false
|
||||
var prettyPrint = false
|
||||
|
@ -90,6 +90,9 @@ func evaluateAll(cmd *cobra.Command, args []string) (cmdError error) {
|
||||
}
|
||||
|
||||
printer := yqlib.NewPrinter(encoder, printerWriter)
|
||||
if nulSepOutput {
|
||||
printer.SetNulSepOutput(true)
|
||||
}
|
||||
|
||||
if frontMatter != "" {
|
||||
frontMatterHandler := yqlib.NewFrontMatterHandler(args[0])
|
||||
|
@ -99,6 +99,9 @@ func evaluateSequence(cmd *cobra.Command, args []string) (cmdError error) {
|
||||
}
|
||||
|
||||
printer := yqlib.NewPrinter(encoder, printerWriter)
|
||||
if nulSepOutput {
|
||||
printer.SetNulSepOutput(true)
|
||||
}
|
||||
|
||||
decoder, err := configureDecoder(false)
|
||||
if err != nil {
|
||||
|
@ -86,6 +86,7 @@ yq -P sample.json
|
||||
rootCmd.PersistentFlags().BoolVarP(&writeInplace, "inplace", "i", false, "update the file inplace of first file given.")
|
||||
rootCmd.PersistentFlags().VarP(unwrapScalarFlag, "unwrapScalar", "r", "unwrap scalar, print the value with no quotes, colors or comments. Defaults to true for yaml")
|
||||
rootCmd.PersistentFlags().Lookup("unwrapScalar").NoOptDefVal = "true"
|
||||
rootCmd.PersistentFlags().BoolVarP(&nulSepOutput, "nul-output", "0", false, "Use NUL char to separate values. If unwrap scalar is also set, fail if unwrapped scalar contains NUL char.")
|
||||
|
||||
rootCmd.PersistentFlags().BoolVarP(&prettyPrint, "prettyPrint", "P", false, "pretty print, shorthand for '... style = \"\"'")
|
||||
rootCmd.PersistentFlags().BoolVarP(&exitStatus, "exit-status", "e", false, "set exit status if there are no matches or null or false is returned")
|
||||
|
@ -2,6 +2,7 @@ package yqlib
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"container/list"
|
||||
"fmt"
|
||||
"io"
|
||||
@ -15,6 +16,7 @@ type Printer interface {
|
||||
PrintedAnything() bool
|
||||
//e.g. when given a front-matter doc, like jekyll
|
||||
SetAppendix(reader io.Reader)
|
||||
SetNulSepOutput(nulSepOutput bool)
|
||||
}
|
||||
|
||||
type PrinterOutputFormat uint32
|
||||
@ -59,6 +61,7 @@ type resultsPrinter struct {
|
||||
printedMatches bool
|
||||
treeNavigator DataTreeNavigator
|
||||
appendixReader io.Reader
|
||||
nulSepOutput bool
|
||||
}
|
||||
|
||||
func NewPrinter(encoder Encoder, printerWriter PrinterWriter) Printer {
|
||||
@ -67,9 +70,16 @@ func NewPrinter(encoder Encoder, printerWriter PrinterWriter) Printer {
|
||||
printerWriter: printerWriter,
|
||||
firstTimePrinting: true,
|
||||
treeNavigator: NewDataTreeNavigator(),
|
||||
nulSepOutput: false,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *resultsPrinter) SetNulSepOutput(nulSepOutput bool) {
|
||||
log.Debug("Setting NUL separator output")
|
||||
|
||||
p.nulSepOutput = nulSepOutput
|
||||
}
|
||||
|
||||
func (p *resultsPrinter) SetAppendix(reader io.Reader) {
|
||||
p.appendixReader = reader
|
||||
}
|
||||
@ -84,6 +94,16 @@ func (p *resultsPrinter) printNode(node *yaml.Node, writer io.Writer) error {
|
||||
return p.encoder.Encode(writer, node)
|
||||
}
|
||||
|
||||
func removeLastEOL(b *bytes.Buffer) {
|
||||
data := b.Bytes()
|
||||
n := len(data)
|
||||
if n >= 2 && data[n-2] == '\r' && data[n-1] == '\n' {
|
||||
b.Truncate(n - 2)
|
||||
} else if n >= 1 && (data[n-1] == '\r' || data[n-1] == '\n') {
|
||||
b.Truncate(n - 1)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error {
|
||||
log.Debug("PrintResults for %v matches", matchingNodes.Len())
|
||||
|
||||
@ -128,18 +148,40 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := p.encoder.PrintLeadingContent(writer, mappedDoc.LeadingContent); err != nil {
|
||||
var destination io.Writer = writer
|
||||
tempBuffer := bytes.NewBuffer(nil)
|
||||
if p.nulSepOutput {
|
||||
destination = tempBuffer
|
||||
}
|
||||
|
||||
if err := p.encoder.PrintLeadingContent(destination, mappedDoc.LeadingContent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := p.printNode(mappedDoc.Node, writer); err != nil {
|
||||
if err := p.printNode(mappedDoc.Node, destination); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := p.encoder.PrintLeadingContent(writer, mappedDoc.TrailingContent); err != nil {
|
||||
if err := p.encoder.PrintLeadingContent(destination, mappedDoc.TrailingContent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if p.nulSepOutput {
|
||||
removeLastEOL(tempBuffer)
|
||||
tempBufferBytes := tempBuffer.Bytes()
|
||||
if bytes.IndexByte(tempBufferBytes, 0) != -1 {
|
||||
return fmt.Errorf(
|
||||
"Can't serialize value because it contains NUL char and you are using NUL separated output",
|
||||
)
|
||||
}
|
||||
if _, err := writer.Write(tempBufferBytes); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := writer.Write([]byte{0}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
p.previousDocIndex = mappedDoc.Document
|
||||
if err := writer.Flush(); err != nil {
|
||||
return err
|
||||
|
@ -340,3 +340,53 @@ func TestPrinterMultipleDocsJson(t *testing.T) {
|
||||
writer.Flush()
|
||||
test.AssertResult(t, expected, output.String())
|
||||
}
|
||||
|
||||
func TestPrinterNulSeparator(t *testing.T) {
|
||||
var output bytes.Buffer
|
||||
var writer = bufio.NewWriter(&output)
|
||||
printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, false)
|
||||
printer.SetNulSepOutput(true)
|
||||
node, err := getExpressionParser().ParseExpression(".a")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
streamEvaluator := NewStreamEvaluator()
|
||||
_, err = streamEvaluator.Evaluate("sample", strings.NewReader(multiDocSample), node, printer, NewYamlDecoder(ConfiguredYamlPreferences))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
writer.Flush()
|
||||
expected := "banana\x00apple\x00coconut\x00"
|
||||
test.AssertResult(t, expected, output.String())
|
||||
}
|
||||
|
||||
func TestPrinterNulSeparatorWithJson(t *testing.T) {
|
||||
var output bytes.Buffer
|
||||
var writer = bufio.NewWriter(&output)
|
||||
// note printDocSeparators is true, it should still not print document separators
|
||||
// when outputing JSON.
|
||||
encoder := NewJSONEncoder(0, false, false)
|
||||
if encoder == nil {
|
||||
t.Skipf("no support for %s output format", "json")
|
||||
}
|
||||
printer := NewPrinter(encoder, NewSinglePrinterWriter(writer))
|
||||
printer.SetNulSepOutput(true)
|
||||
|
||||
inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder(ConfiguredYamlPreferences))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
inputs.Front().Value.(*CandidateNode).LeadingContent = "# ignore this\n"
|
||||
|
||||
err = printer.PrintResults(inputs)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
expected := `{"a":"banana"}` + "\x00" + `{"a":"apple"}` + "\x00" + `{"a":"coconut"}` + "\x00"
|
||||
|
||||
writer.Flush()
|
||||
test.AssertResult(t, expected, output.String())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user