Implement shell output format (#1645)

* fix typo in a comment

* implement shell output format

* fix a typo

* add two test cases, have source uses ascii only

* add integration tests and documentation

* add fixes after code revieew
This commit is contained in:
Giorgio Gallo 2023-05-04 03:06:56 +02:00 committed by GitHub
parent bbe3055006
commit 80b42b81fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 438 additions and 3 deletions

View File

@ -12,7 +12,7 @@
# Documentation
The documentation is a bit of a mixed bag (sorry in advanced, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined.
The documentation is a bit of a mixed bag (sorry in advance, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined.
Documentation is written in markdown, and is published in the 'gitbook' branch.

View File

@ -195,6 +195,8 @@ func createEncoder(format yqlib.PrinterOutputFormat) (yqlib.Encoder, error) {
return yqlib.NewXMLEncoder(indent, yqlib.ConfiguredXMLPreferences), nil
case yqlib.TomlOutputFormat:
return yqlib.NewTomlEncoder(), nil
case yqlib.ShellVariablesOutputFormat:
return yqlib.NewShellVariablesEncoder(), nil
}
return nil, fmt.Errorf("invalid encoder: %v", format)
}

View File

@ -0,0 +1,86 @@
## Encode shell variables
Note that comments are dropped and values will be enclosed in single quotes as needed.
Given a sample.yml file of:
```yaml
# comment
name: Mike Wazowski
eyes:
color: turquoise
number: 1
friends:
- James P. Sullivan
- Celia Mae
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
name='Mike Wazowski'
eyes_color=turquoise
eyes_number=1
friends_0='James P. Sullivan'
friends_1='Celia Mae'
```
## Encode shell variables: illegal variable names as key.
Keys that would be illegal as variable keys are adapted.
Given a sample.yml file of:
```yaml
ascii_=_symbols: replaced with _
"ascii_ _controls": dropped (this example uses \t)
nonascii_א_characters: dropped
effrot_expeñded_tò_preserve_accented_latin_letters: moderate (via unicode NFKD)
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
ascii___symbols='replaced with _'
ascii__controls='dropped (this example uses \t)'
nonascii__characters=dropped
effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)'
```
## Encode shell variables: empty values, arrays and maps
Empty values are encoded to empty variables, but empty arrays and maps are skipped.
Given a sample.yml file of:
```yaml
empty:
value:
array: []
map: {}
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
empty_value=
```
## Encode shell variables: single quotes in values
Single quotes in values are encoded as '"'"' (close single quote, double-quoted single quote, open single quote).
Given a sample.yml file of:
```yaml
name: Miles O'Brien
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
name='Miles O'"'"'Brien'
```

View File

@ -0,0 +1,153 @@
package yqlib
import (
"fmt"
"io"
"strings"
"unicode/utf8"
"golang.org/x/text/unicode/norm"
yaml "gopkg.in/yaml.v3"
)
type shellVariablesEncoder struct {
}
func NewShellVariablesEncoder() Encoder {
return &shellVariablesEncoder{}
}
func (pe *shellVariablesEncoder) CanHandleAliases() bool {
return false
}
func (pe *shellVariablesEncoder) PrintDocumentSeparator(_ io.Writer) error {
return nil
}
func (pe *shellVariablesEncoder) PrintLeadingContent(_ io.Writer, _ string) error {
return nil
}
func (pe *shellVariablesEncoder) Encode(writer io.Writer, node *yaml.Node) error {
mapKeysToStrings(node)
err := pe.doEncode(&writer, node, "")
if err != nil {
return err
}
return err
}
func (pe *shellVariablesEncoder) doEncode(w *io.Writer, node *yaml.Node, path string) error {
// Note this drops all comments.
switch node.Kind {
case yaml.ScalarNode:
nonemptyPath := path
if path == "" {
// We can't assign an empty variable "=somevalue" because that would error out if sourced in a shell,
// nor can we use "_" as a variable name ($_ is a special shell variable that can't be assigned)...
// let's just pick a fallback key to use if we are encoding a single scalar
nonemptyPath = "value"
}
_, err := io.WriteString(*w, nonemptyPath+"="+quoteValue(node.Value)+"\n")
return err
case yaml.DocumentNode:
return pe.doEncode(w, node.Content[0], path)
case yaml.SequenceNode:
for index, child := range node.Content {
err := pe.doEncode(w, child, appendPath(path, index))
if err != nil {
return err
}
}
return nil
case yaml.MappingNode:
for index := 0; index < len(node.Content); index = index + 2 {
key := node.Content[index]
value := node.Content[index+1]
err := pe.doEncode(w, value, appendPath(path, key.Value))
if err != nil {
return err
}
}
return nil
case yaml.AliasNode:
return pe.doEncode(w, node.Alias, path)
default:
return fmt.Errorf("Unsupported node %v", node.Tag)
}
}
func appendPath(cookedPath string, rawKey interface{}) string {
// Shell variable names must match
// [a-zA-Z_]+[a-zA-Z0-9_]*
//
// While this is not mandated by POSIX, which is quite lenient, it is
// what shells (for example busybox ash *) allow in practice.
//
// Since yaml names can contain basically any character, we will process them according to these steps:
//
// 1. apply unicode compatibility decomposition NFKD (this will convert accented
// letters to letters followed by accents, split ligatures, replace exponents
// with the corresponding digit, etc.
//
// 2. discard non-ASCII characters as well as ASCII control characters (ie. anything
// with code point < 32 or > 126), this will eg. discard accents but keep the base
// unaccented letter because of NFKD above
//
// 3. replace all non-alphanumeric characters with _
//
// Moreover, for the root key only, we will prepend an underscore if what results from the steps above
// does not start with [a-zA-Z_] (ie. if the root key starts with a digit).
//
// Note this is NOT a 1:1 mapping.
//
// (*) see endofname.c from https://git.busybox.net/busybox/tag/?h=1_36_0
// XXX empty strings
key := strings.Map(func(r rune) rune {
if isAlphaNumericOrUnderscore(r) {
return r
} else if r < 32 || 126 < r {
return -1
}
return '_'
}, norm.NFKD.String(fmt.Sprintf("%v", rawKey)))
if cookedPath == "" {
firstRune, _ := utf8.DecodeRuneInString(key)
if !isAlphaOrUnderscore(firstRune) {
return "_" + key
}
return key
}
return cookedPath + "_" + key
}
func quoteValue(value string) string {
needsQuoting := false
for _, r := range value {
if !isAlphaNumericOrUnderscore(r) {
needsQuoting = true
break
}
}
if needsQuoting {
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
}
return value
}
func isAlphaOrUnderscore(r rune) bool {
return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || r == '_'
}
func isAlphaNumericOrUnderscore(r rune) bool {
return isAlphaOrUnderscore(r) || ('0' <= r && r <= '9')
}

View File

@ -0,0 +1,93 @@
package yqlib
import (
"bufio"
"bytes"
"strings"
"testing"
"github.com/mikefarah/yq/v4/test"
)
func assertEncodesTo(t *testing.T, yaml string, shellvars string) {
var output bytes.Buffer
writer := bufio.NewWriter(&output)
var encoder = NewShellVariablesEncoder()
inputs, err := readDocuments(strings.NewReader(yaml), "test.yml", 0, NewYamlDecoder(ConfiguredYamlPreferences))
if err != nil {
panic(err)
}
node := inputs.Front().Value.(*CandidateNode).Node
err = encoder.Encode(writer, node)
if err != nil {
panic(err)
}
writer.Flush()
test.AssertResult(t, shellvars, strings.TrimSuffix(output.String(), "\n"))
}
func TestShellVariablesEncoderNonquoting(t *testing.T) {
assertEncodesTo(t, "a: alice", "a=alice")
}
func TestShellVariablesEncoderQuoting(t *testing.T) {
assertEncodesTo(t, "a: Lewis Carroll", "a='Lewis Carroll'")
}
func TestShellVariablesEncoderQuotesQuoting(t *testing.T) {
assertEncodesTo(t, "a: Lewis Carroll's Alice", "a='Lewis Carroll'\"'\"'s Alice'")
}
func TestShellVariablesEncoderStripComments(t *testing.T) {
assertEncodesTo(t, "a: Alice # comment", "a=Alice")
}
func TestShellVariablesEncoderMap(t *testing.T) {
assertEncodesTo(t, "a:\n b: Lewis\n c: Carroll", "a_b=Lewis\na_c=Carroll")
}
func TestShellVariablesEncoderArray_Unwrapped(t *testing.T) {
assertEncodesTo(t, "a: [{n: Alice}, {n: Bob}]", "a_0_n=Alice\na_1_n=Bob")
}
func TestShellVariablesEncoderKeyNonPrintable(t *testing.T) {
assertEncodesTo(t, `"be\all": ring!`, "bell='ring!'")
}
func TestShellVariablesEncoderKeyPrintableNonAlphaNumeric(t *testing.T) {
assertEncodesTo(t, `"b-e l=l": ring!`, "b_e_l_l='ring!'")
}
func TestShellVariablesEncoderKeyPrintableNonAscii(t *testing.T) {
assertEncodesTo(t, `"b\u00e9ll": ring!`, "bell='ring!'")
}
func TestShellVariablesEncoderRootKeyStartingWithDigit(t *testing.T) {
assertEncodesTo(t, "1a: onea", "_1a=onea")
}
func TestShellVariablesEncoderRootKeyStartingWithUnderscore(t *testing.T) {
assertEncodesTo(t, "_key: value", "_key=value")
}
func TestShellVariablesEncoderChildStartingWithUnderscore(t *testing.T) {
assertEncodesTo(t, "root:\n _child: value", "root__child=value")
}
func TestShellVariablesEncoderEmptyValue(t *testing.T) {
assertEncodesTo(t, "empty:", "empty=")
}
func TestShellVariablesEncoderEmptyArray(t *testing.T) {
assertEncodesTo(t, "empty: []", "")
}
func TestShellVariablesEncoderEmptyMap(t *testing.T) {
assertEncodesTo(t, "empty: {}", "")
}
func TestShellVariablesEncoderScalarNode(t *testing.T) {
assertEncodesTo(t, "some string", "value='some string'")
}

View File

@ -32,6 +32,7 @@ const (
UriOutputFormat
ShOutputFormat
TomlOutputFormat
ShellVariablesOutputFormat
)
func OutputFormatFromString(format string) (PrinterOutputFormat, error) {
@ -50,8 +51,10 @@ func OutputFormatFromString(format string) (PrinterOutputFormat, error) {
return XMLOutputFormat, nil
case "toml":
return TomlOutputFormat, nil
case "shell", "s", "sh":
return ShellVariablesOutputFormat, nil
default:
return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml]", format)
return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml|toml|shell]", format)
}
}

View File

@ -0,0 +1,98 @@
package yqlib
import (
"bufio"
"fmt"
"testing"
"github.com/mikefarah/yq/v4/test"
)
var shellVariablesScenarios = []formatScenario{
{
description: "Encode shell variables",
subdescription: "Note that comments are dropped and values will be enclosed in single quotes as needed.",
input: "" +
"# comment" + "\n" +
"name: Mike Wazowski" + "\n" +
"eyes:" + "\n" +
" color: turquoise" + "\n" +
" number: 1" + "\n" +
"friends:" + "\n" +
" - James P. Sullivan" + "\n" +
" - Celia Mae",
expected: "" +
"name='Mike Wazowski'" + "\n" +
"eyes_color=turquoise" + "\n" +
"eyes_number=1" + "\n" +
"friends_0='James P. Sullivan'" + "\n" +
"friends_1='Celia Mae'" + "\n",
},
{
description: "Encode shell variables: illegal variable names as key.",
subdescription: "Keys that would be illegal as variable keys are adapted.",
input: "" +
"ascii_=_symbols: replaced with _" + "\n" +
"\"ascii_\t_controls\": dropped (this example uses \\t)" + "\n" +
"nonascii_\u05d0_characters: dropped" + "\n" +
"effrot_expe\u00f1ded_t\u00f2_preserve_accented_latin_letters: moderate (via unicode NFKD)" + "\n",
expected: "" +
"ascii___symbols='replaced with _'" + "\n" +
"ascii__controls='dropped (this example uses \\t)'" + "\n" +
"nonascii__characters=dropped" + "\n" +
"effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)'" + "\n",
},
{
description: "Encode shell variables: empty values, arrays and maps",
subdescription: "Empty values are encoded to empty variables, but empty arrays and maps are skipped.",
input: "empty:\n value:\n array: []\n map: {}",
expected: "empty_value=" + "\n",
},
{
description: "Encode shell variables: single quotes in values",
subdescription: "Single quotes in values are encoded as '\"'\"' (close single quote, double-quoted single quote, open single quote).",
input: "name: Miles O'Brien",
expected: `name='Miles O'"'"'Brien'` + "\n",
},
}
func TestShellVariableScenarios(t *testing.T) {
for _, s := range shellVariablesScenarios {
//fmt.Printf("\t<%s> <%s>\n", s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder()))
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder()), s.description)
}
genericScenarios := make([]interface{}, len(shellVariablesScenarios))
for i, s := range shellVariablesScenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "shellvariables", genericScenarios, documentShellVaraibleScenario)
}
func documentShellVaraibleScenario(_ *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
return
}
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.yml file of:\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=shell '%v' sample.yml\n```\n", expression))
} else {
writeOrPanic(w, "```bash\nyq -o=shell sample.yml\n```\n")
}
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```sh\n%v```\n\n", mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder())))
}

View File

@ -15,7 +15,7 @@ func readStream(filename string) (io.Reader, error) {
reader = bufio.NewReader(os.Stdin)
} else {
// ignore CWE-22 gosec issue - that's more targeted for http based apps that run in a public directory,
// and ensuring that it's not possible to give a path to a file outside thar directory.
// and ensuring that it's not possible to give a path to a file outside that directory.
file, err := os.Open(filename) // #nosec
if err != nil {
return nil, err