Implement shell output format (#1645)

* fix typo in a comment

* implement shell output format

* fix a typo

* add two test cases, have source uses ascii only

* add integration tests and documentation

* add fixes after code revieew
This commit is contained in:
Giorgio Gallo 2023-05-04 03:06:56 +02:00 committed by GitHub
parent bbe3055006
commit 80b42b81fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 438 additions and 3 deletions

View File

@ -12,7 +12,7 @@
# Documentation # Documentation
The documentation is a bit of a mixed bag (sorry in advanced, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined. The documentation is a bit of a mixed bag (sorry in advance, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined.
Documentation is written in markdown, and is published in the 'gitbook' branch. Documentation is written in markdown, and is published in the 'gitbook' branch.

View File

@ -195,6 +195,8 @@ func createEncoder(format yqlib.PrinterOutputFormat) (yqlib.Encoder, error) {
return yqlib.NewXMLEncoder(indent, yqlib.ConfiguredXMLPreferences), nil return yqlib.NewXMLEncoder(indent, yqlib.ConfiguredXMLPreferences), nil
case yqlib.TomlOutputFormat: case yqlib.TomlOutputFormat:
return yqlib.NewTomlEncoder(), nil return yqlib.NewTomlEncoder(), nil
case yqlib.ShellVariablesOutputFormat:
return yqlib.NewShellVariablesEncoder(), nil
} }
return nil, fmt.Errorf("invalid encoder: %v", format) return nil, fmt.Errorf("invalid encoder: %v", format)
} }

View File

@ -0,0 +1,86 @@
## Encode shell variables
Note that comments are dropped and values will be enclosed in single quotes as needed.
Given a sample.yml file of:
```yaml
# comment
name: Mike Wazowski
eyes:
color: turquoise
number: 1
friends:
- James P. Sullivan
- Celia Mae
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
name='Mike Wazowski'
eyes_color=turquoise
eyes_number=1
friends_0='James P. Sullivan'
friends_1='Celia Mae'
```
## Encode shell variables: illegal variable names as key.
Keys that would be illegal as variable keys are adapted.
Given a sample.yml file of:
```yaml
ascii_=_symbols: replaced with _
"ascii_ _controls": dropped (this example uses \t)
nonascii_א_characters: dropped
effrot_expeñded_tò_preserve_accented_latin_letters: moderate (via unicode NFKD)
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
ascii___symbols='replaced with _'
ascii__controls='dropped (this example uses \t)'
nonascii__characters=dropped
effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)'
```
## Encode shell variables: empty values, arrays and maps
Empty values are encoded to empty variables, but empty arrays and maps are skipped.
Given a sample.yml file of:
```yaml
empty:
value:
array: []
map: {}
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
empty_value=
```
## Encode shell variables: single quotes in values
Single quotes in values are encoded as '"'"' (close single quote, double-quoted single quote, open single quote).
Given a sample.yml file of:
```yaml
name: Miles O'Brien
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
name='Miles O'"'"'Brien'
```

View File

@ -0,0 +1,153 @@
package yqlib
import (
"fmt"
"io"
"strings"
"unicode/utf8"
"golang.org/x/text/unicode/norm"
yaml "gopkg.in/yaml.v3"
)
type shellVariablesEncoder struct {
}
func NewShellVariablesEncoder() Encoder {
return &shellVariablesEncoder{}
}
func (pe *shellVariablesEncoder) CanHandleAliases() bool {
return false
}
func (pe *shellVariablesEncoder) PrintDocumentSeparator(_ io.Writer) error {
return nil
}
func (pe *shellVariablesEncoder) PrintLeadingContent(_ io.Writer, _ string) error {
return nil
}
func (pe *shellVariablesEncoder) Encode(writer io.Writer, node *yaml.Node) error {
mapKeysToStrings(node)
err := pe.doEncode(&writer, node, "")
if err != nil {
return err
}
return err
}
func (pe *shellVariablesEncoder) doEncode(w *io.Writer, node *yaml.Node, path string) error {
// Note this drops all comments.
switch node.Kind {
case yaml.ScalarNode:
nonemptyPath := path
if path == "" {
// We can't assign an empty variable "=somevalue" because that would error out if sourced in a shell,
// nor can we use "_" as a variable name ($_ is a special shell variable that can't be assigned)...
// let's just pick a fallback key to use if we are encoding a single scalar
nonemptyPath = "value"
}
_, err := io.WriteString(*w, nonemptyPath+"="+quoteValue(node.Value)+"\n")
return err
case yaml.DocumentNode:
return pe.doEncode(w, node.Content[0], path)
case yaml.SequenceNode:
for index, child := range node.Content {
err := pe.doEncode(w, child, appendPath(path, index))
if err != nil {
return err
}
}
return nil
case yaml.MappingNode:
for index := 0; index < len(node.Content); index = index + 2 {
key := node.Content[index]
value := node.Content[index+1]
err := pe.doEncode(w, value, appendPath(path, key.Value))
if err != nil {
return err
}
}
return nil
case yaml.AliasNode:
return pe.doEncode(w, node.Alias, path)
default:
return fmt.Errorf("Unsupported node %v", node.Tag)
}
}
func appendPath(cookedPath string, rawKey interface{}) string {
// Shell variable names must match
// [a-zA-Z_]+[a-zA-Z0-9_]*
//
// While this is not mandated by POSIX, which is quite lenient, it is
// what shells (for example busybox ash *) allow in practice.
//
// Since yaml names can contain basically any character, we will process them according to these steps:
//
// 1. apply unicode compatibility decomposition NFKD (this will convert accented
// letters to letters followed by accents, split ligatures, replace exponents
// with the corresponding digit, etc.
//
// 2. discard non-ASCII characters as well as ASCII control characters (ie. anything
// with code point < 32 or > 126), this will eg. discard accents but keep the base
// unaccented letter because of NFKD above
//
// 3. replace all non-alphanumeric characters with _
//
// Moreover, for the root key only, we will prepend an underscore if what results from the steps above
// does not start with [a-zA-Z_] (ie. if the root key starts with a digit).
//
// Note this is NOT a 1:1 mapping.
//
// (*) see endofname.c from https://git.busybox.net/busybox/tag/?h=1_36_0
// XXX empty strings
key := strings.Map(func(r rune) rune {
if isAlphaNumericOrUnderscore(r) {
return r
} else if r < 32 || 126 < r {
return -1
}
return '_'
}, norm.NFKD.String(fmt.Sprintf("%v", rawKey)))
if cookedPath == "" {
firstRune, _ := utf8.DecodeRuneInString(key)
if !isAlphaOrUnderscore(firstRune) {
return "_" + key
}
return key
}
return cookedPath + "_" + key
}
func quoteValue(value string) string {
needsQuoting := false
for _, r := range value {
if !isAlphaNumericOrUnderscore(r) {
needsQuoting = true
break
}
}
if needsQuoting {
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
}
return value
}
func isAlphaOrUnderscore(r rune) bool {
return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || r == '_'
}
func isAlphaNumericOrUnderscore(r rune) bool {
return isAlphaOrUnderscore(r) || ('0' <= r && r <= '9')
}

View File

@ -0,0 +1,93 @@
package yqlib
import (
"bufio"
"bytes"
"strings"
"testing"
"github.com/mikefarah/yq/v4/test"
)
func assertEncodesTo(t *testing.T, yaml string, shellvars string) {
var output bytes.Buffer
writer := bufio.NewWriter(&output)
var encoder = NewShellVariablesEncoder()
inputs, err := readDocuments(strings.NewReader(yaml), "test.yml", 0, NewYamlDecoder(ConfiguredYamlPreferences))
if err != nil {
panic(err)
}
node := inputs.Front().Value.(*CandidateNode).Node
err = encoder.Encode(writer, node)
if err != nil {
panic(err)
}
writer.Flush()
test.AssertResult(t, shellvars, strings.TrimSuffix(output.String(), "\n"))
}
func TestShellVariablesEncoderNonquoting(t *testing.T) {
assertEncodesTo(t, "a: alice", "a=alice")
}
func TestShellVariablesEncoderQuoting(t *testing.T) {
assertEncodesTo(t, "a: Lewis Carroll", "a='Lewis Carroll'")
}
func TestShellVariablesEncoderQuotesQuoting(t *testing.T) {
assertEncodesTo(t, "a: Lewis Carroll's Alice", "a='Lewis Carroll'\"'\"'s Alice'")
}
func TestShellVariablesEncoderStripComments(t *testing.T) {
assertEncodesTo(t, "a: Alice # comment", "a=Alice")
}
func TestShellVariablesEncoderMap(t *testing.T) {
assertEncodesTo(t, "a:\n b: Lewis\n c: Carroll", "a_b=Lewis\na_c=Carroll")
}
func TestShellVariablesEncoderArray_Unwrapped(t *testing.T) {
assertEncodesTo(t, "a: [{n: Alice}, {n: Bob}]", "a_0_n=Alice\na_1_n=Bob")
}
func TestShellVariablesEncoderKeyNonPrintable(t *testing.T) {
assertEncodesTo(t, `"be\all": ring!`, "bell='ring!'")
}
func TestShellVariablesEncoderKeyPrintableNonAlphaNumeric(t *testing.T) {
assertEncodesTo(t, `"b-e l=l": ring!`, "b_e_l_l='ring!'")
}
func TestShellVariablesEncoderKeyPrintableNonAscii(t *testing.T) {
assertEncodesTo(t, `"b\u00e9ll": ring!`, "bell='ring!'")
}
func TestShellVariablesEncoderRootKeyStartingWithDigit(t *testing.T) {
assertEncodesTo(t, "1a: onea", "_1a=onea")
}
func TestShellVariablesEncoderRootKeyStartingWithUnderscore(t *testing.T) {
assertEncodesTo(t, "_key: value", "_key=value")
}
func TestShellVariablesEncoderChildStartingWithUnderscore(t *testing.T) {
assertEncodesTo(t, "root:\n _child: value", "root__child=value")
}
func TestShellVariablesEncoderEmptyValue(t *testing.T) {
assertEncodesTo(t, "empty:", "empty=")
}
func TestShellVariablesEncoderEmptyArray(t *testing.T) {
assertEncodesTo(t, "empty: []", "")
}
func TestShellVariablesEncoderEmptyMap(t *testing.T) {
assertEncodesTo(t, "empty: {}", "")
}
func TestShellVariablesEncoderScalarNode(t *testing.T) {
assertEncodesTo(t, "some string", "value='some string'")
}

View File

@ -32,6 +32,7 @@ const (
UriOutputFormat UriOutputFormat
ShOutputFormat ShOutputFormat
TomlOutputFormat TomlOutputFormat
ShellVariablesOutputFormat
) )
func OutputFormatFromString(format string) (PrinterOutputFormat, error) { func OutputFormatFromString(format string) (PrinterOutputFormat, error) {
@ -50,8 +51,10 @@ func OutputFormatFromString(format string) (PrinterOutputFormat, error) {
return XMLOutputFormat, nil return XMLOutputFormat, nil
case "toml": case "toml":
return TomlOutputFormat, nil return TomlOutputFormat, nil
case "shell", "s", "sh":
return ShellVariablesOutputFormat, nil
default: default:
return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml]", format) return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml|toml|shell]", format)
} }
} }

View File

@ -0,0 +1,98 @@
package yqlib
import (
"bufio"
"fmt"
"testing"
"github.com/mikefarah/yq/v4/test"
)
var shellVariablesScenarios = []formatScenario{
{
description: "Encode shell variables",
subdescription: "Note that comments are dropped and values will be enclosed in single quotes as needed.",
input: "" +
"# comment" + "\n" +
"name: Mike Wazowski" + "\n" +
"eyes:" + "\n" +
" color: turquoise" + "\n" +
" number: 1" + "\n" +
"friends:" + "\n" +
" - James P. Sullivan" + "\n" +
" - Celia Mae",
expected: "" +
"name='Mike Wazowski'" + "\n" +
"eyes_color=turquoise" + "\n" +
"eyes_number=1" + "\n" +
"friends_0='James P. Sullivan'" + "\n" +
"friends_1='Celia Mae'" + "\n",
},
{
description: "Encode shell variables: illegal variable names as key.",
subdescription: "Keys that would be illegal as variable keys are adapted.",
input: "" +
"ascii_=_symbols: replaced with _" + "\n" +
"\"ascii_\t_controls\": dropped (this example uses \\t)" + "\n" +
"nonascii_\u05d0_characters: dropped" + "\n" +
"effrot_expe\u00f1ded_t\u00f2_preserve_accented_latin_letters: moderate (via unicode NFKD)" + "\n",
expected: "" +
"ascii___symbols='replaced with _'" + "\n" +
"ascii__controls='dropped (this example uses \\t)'" + "\n" +
"nonascii__characters=dropped" + "\n" +
"effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)'" + "\n",
},
{
description: "Encode shell variables: empty values, arrays and maps",
subdescription: "Empty values are encoded to empty variables, but empty arrays and maps are skipped.",
input: "empty:\n value:\n array: []\n map: {}",
expected: "empty_value=" + "\n",
},
{
description: "Encode shell variables: single quotes in values",
subdescription: "Single quotes in values are encoded as '\"'\"' (close single quote, double-quoted single quote, open single quote).",
input: "name: Miles O'Brien",
expected: `name='Miles O'"'"'Brien'` + "\n",
},
}
func TestShellVariableScenarios(t *testing.T) {
for _, s := range shellVariablesScenarios {
//fmt.Printf("\t<%s> <%s>\n", s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder()))
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder()), s.description)
}
genericScenarios := make([]interface{}, len(shellVariablesScenarios))
for i, s := range shellVariablesScenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "shellvariables", genericScenarios, documentShellVaraibleScenario)
}
func documentShellVaraibleScenario(_ *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
return
}
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.yml file of:\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=shell '%v' sample.yml\n```\n", expression))
} else {
writeOrPanic(w, "```bash\nyq -o=shell sample.yml\n```\n")
}
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```sh\n%v```\n\n", mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder())))
}

View File

@ -15,7 +15,7 @@ func readStream(filename string) (io.Reader, error) {
reader = bufio.NewReader(os.Stdin) reader = bufio.NewReader(os.Stdin)
} else { } else {
// ignore CWE-22 gosec issue - that's more targeted for http based apps that run in a public directory, // ignore CWE-22 gosec issue - that's more targeted for http based apps that run in a public directory,
// and ensuring that it's not possible to give a path to a file outside thar directory. // and ensuring that it's not possible to give a path to a file outside that directory.
file, err := os.Open(filename) // #nosec file, err := os.Open(filename) // #nosec
if err != nil { if err != nil {
return nil, err return nil, err