From 80b42b81fdf9228967d9df1720354c70284b8940 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Thu, 4 May 2023 03:06:56 +0200 Subject: [PATCH] Implement shell output format (#1645) * fix typo in a comment * implement shell output format * fix a typo * add two test cases, have source uses ascii only * add integration tests and documentation * add fixes after code revieew --- CONTRIBUTING.md | 2 +- cmd/utils.go | 2 + pkg/yqlib/doc/usage/shellvariables.md | 86 +++++++++++++ pkg/yqlib/encoder_shellvariables.go | 153 +++++++++++++++++++++++ pkg/yqlib/encoder_shellvariables_test.go | 93 ++++++++++++++ pkg/yqlib/printer.go | 5 +- pkg/yqlib/shellvariables_test.go | 98 +++++++++++++++ pkg/yqlib/utils.go | 2 +- 8 files changed, 438 insertions(+), 3 deletions(-) create mode 100644 pkg/yqlib/doc/usage/shellvariables.md create mode 100644 pkg/yqlib/encoder_shellvariables.go create mode 100644 pkg/yqlib/encoder_shellvariables_test.go create mode 100644 pkg/yqlib/shellvariables_test.go diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4efe20d9..17fab63e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,7 +12,7 @@ # Documentation -The documentation is a bit of a mixed bag (sorry in advanced, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined. +The documentation is a bit of a mixed bag (sorry in advance, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined. Documentation is written in markdown, and is published in the 'gitbook' branch. diff --git a/cmd/utils.go b/cmd/utils.go index e54cfb79..7d34c723 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -195,6 +195,8 @@ func createEncoder(format yqlib.PrinterOutputFormat) (yqlib.Encoder, error) { return yqlib.NewXMLEncoder(indent, yqlib.ConfiguredXMLPreferences), nil case yqlib.TomlOutputFormat: return yqlib.NewTomlEncoder(), nil + case yqlib.ShellVariablesOutputFormat: + return yqlib.NewShellVariablesEncoder(), nil } return nil, fmt.Errorf("invalid encoder: %v", format) } diff --git a/pkg/yqlib/doc/usage/shellvariables.md b/pkg/yqlib/doc/usage/shellvariables.md new file mode 100644 index 00000000..a2919715 --- /dev/null +++ b/pkg/yqlib/doc/usage/shellvariables.md @@ -0,0 +1,86 @@ + +## Encode shell variables +Note that comments are dropped and values will be enclosed in single quotes as needed. + +Given a sample.yml file of: +```yaml +# comment +name: Mike Wazowski +eyes: + color: turquoise + number: 1 +friends: + - James P. Sullivan + - Celia Mae +``` +then +```bash +yq -o=shell sample.yml +``` +will output +```sh +name='Mike Wazowski' +eyes_color=turquoise +eyes_number=1 +friends_0='James P. Sullivan' +friends_1='Celia Mae' +``` + +## Encode shell variables: illegal variable names as key. +Keys that would be illegal as variable keys are adapted. + +Given a sample.yml file of: +```yaml +ascii_=_symbols: replaced with _ +"ascii_ _controls": dropped (this example uses \t) +nonascii_א_characters: dropped +effrot_expeñded_tò_preserve_accented_latin_letters: moderate (via unicode NFKD) + +``` +then +```bash +yq -o=shell sample.yml +``` +will output +```sh +ascii___symbols='replaced with _' +ascii__controls='dropped (this example uses \t)' +nonascii__characters=dropped +effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)' +``` + +## Encode shell variables: empty values, arrays and maps +Empty values are encoded to empty variables, but empty arrays and maps are skipped. + +Given a sample.yml file of: +```yaml +empty: + value: + array: [] + map: {} +``` +then +```bash +yq -o=shell sample.yml +``` +will output +```sh +empty_value= +``` + +## Encode shell variables: single quotes in values +Single quotes in values are encoded as '"'"' (close single quote, double-quoted single quote, open single quote). + +Given a sample.yml file of: +```yaml +name: Miles O'Brien +``` +then +```bash +yq -o=shell sample.yml +``` +will output +```sh +name='Miles O'"'"'Brien' +``` + diff --git a/pkg/yqlib/encoder_shellvariables.go b/pkg/yqlib/encoder_shellvariables.go new file mode 100644 index 00000000..62d6ac8f --- /dev/null +++ b/pkg/yqlib/encoder_shellvariables.go @@ -0,0 +1,153 @@ +package yqlib + +import ( + "fmt" + "io" + "strings" + "unicode/utf8" + + "golang.org/x/text/unicode/norm" + yaml "gopkg.in/yaml.v3" +) + +type shellVariablesEncoder struct { +} + +func NewShellVariablesEncoder() Encoder { + return &shellVariablesEncoder{} +} + +func (pe *shellVariablesEncoder) CanHandleAliases() bool { + return false +} + +func (pe *shellVariablesEncoder) PrintDocumentSeparator(_ io.Writer) error { + return nil +} + +func (pe *shellVariablesEncoder) PrintLeadingContent(_ io.Writer, _ string) error { + return nil +} + +func (pe *shellVariablesEncoder) Encode(writer io.Writer, node *yaml.Node) error { + + mapKeysToStrings(node) + err := pe.doEncode(&writer, node, "") + if err != nil { + return err + } + + return err +} + +func (pe *shellVariablesEncoder) doEncode(w *io.Writer, node *yaml.Node, path string) error { + + // Note this drops all comments. + + switch node.Kind { + case yaml.ScalarNode: + nonemptyPath := path + if path == "" { + // We can't assign an empty variable "=somevalue" because that would error out if sourced in a shell, + // nor can we use "_" as a variable name ($_ is a special shell variable that can't be assigned)... + // let's just pick a fallback key to use if we are encoding a single scalar + nonemptyPath = "value" + } + _, err := io.WriteString(*w, nonemptyPath+"="+quoteValue(node.Value)+"\n") + return err + case yaml.DocumentNode: + return pe.doEncode(w, node.Content[0], path) + case yaml.SequenceNode: + for index, child := range node.Content { + err := pe.doEncode(w, child, appendPath(path, index)) + if err != nil { + return err + } + } + return nil + case yaml.MappingNode: + for index := 0; index < len(node.Content); index = index + 2 { + key := node.Content[index] + value := node.Content[index+1] + err := pe.doEncode(w, value, appendPath(path, key.Value)) + if err != nil { + return err + } + } + return nil + case yaml.AliasNode: + return pe.doEncode(w, node.Alias, path) + default: + return fmt.Errorf("Unsupported node %v", node.Tag) + } +} + +func appendPath(cookedPath string, rawKey interface{}) string { + + // Shell variable names must match + // [a-zA-Z_]+[a-zA-Z0-9_]* + // + // While this is not mandated by POSIX, which is quite lenient, it is + // what shells (for example busybox ash *) allow in practice. + // + // Since yaml names can contain basically any character, we will process them according to these steps: + // + // 1. apply unicode compatibility decomposition NFKD (this will convert accented + // letters to letters followed by accents, split ligatures, replace exponents + // with the corresponding digit, etc. + // + // 2. discard non-ASCII characters as well as ASCII control characters (ie. anything + // with code point < 32 or > 126), this will eg. discard accents but keep the base + // unaccented letter because of NFKD above + // + // 3. replace all non-alphanumeric characters with _ + // + // Moreover, for the root key only, we will prepend an underscore if what results from the steps above + // does not start with [a-zA-Z_] (ie. if the root key starts with a digit). + // + // Note this is NOT a 1:1 mapping. + // + // (*) see endofname.c from https://git.busybox.net/busybox/tag/?h=1_36_0 + + // XXX empty strings + + key := strings.Map(func(r rune) rune { + if isAlphaNumericOrUnderscore(r) { + return r + } else if r < 32 || 126 < r { + return -1 + } + return '_' + }, norm.NFKD.String(fmt.Sprintf("%v", rawKey))) + + if cookedPath == "" { + firstRune, _ := utf8.DecodeRuneInString(key) + if !isAlphaOrUnderscore(firstRune) { + return "_" + key + } + return key + } + return cookedPath + "_" + key +} + +func quoteValue(value string) string { + needsQuoting := false + for _, r := range value { + if !isAlphaNumericOrUnderscore(r) { + needsQuoting = true + break + } + } + if needsQuoting { + return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'" + } + return value +} + +func isAlphaOrUnderscore(r rune) bool { + return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || r == '_' +} + +func isAlphaNumericOrUnderscore(r rune) bool { + return isAlphaOrUnderscore(r) || ('0' <= r && r <= '9') +} diff --git a/pkg/yqlib/encoder_shellvariables_test.go b/pkg/yqlib/encoder_shellvariables_test.go new file mode 100644 index 00000000..b3259088 --- /dev/null +++ b/pkg/yqlib/encoder_shellvariables_test.go @@ -0,0 +1,93 @@ +package yqlib + +import ( + "bufio" + "bytes" + "strings" + "testing" + + "github.com/mikefarah/yq/v4/test" +) + +func assertEncodesTo(t *testing.T, yaml string, shellvars string) { + var output bytes.Buffer + writer := bufio.NewWriter(&output) + + var encoder = NewShellVariablesEncoder() + inputs, err := readDocuments(strings.NewReader(yaml), "test.yml", 0, NewYamlDecoder(ConfiguredYamlPreferences)) + if err != nil { + panic(err) + } + node := inputs.Front().Value.(*CandidateNode).Node + err = encoder.Encode(writer, node) + if err != nil { + panic(err) + } + writer.Flush() + + test.AssertResult(t, shellvars, strings.TrimSuffix(output.String(), "\n")) +} + +func TestShellVariablesEncoderNonquoting(t *testing.T) { + assertEncodesTo(t, "a: alice", "a=alice") +} + +func TestShellVariablesEncoderQuoting(t *testing.T) { + assertEncodesTo(t, "a: Lewis Carroll", "a='Lewis Carroll'") +} + +func TestShellVariablesEncoderQuotesQuoting(t *testing.T) { + assertEncodesTo(t, "a: Lewis Carroll's Alice", "a='Lewis Carroll'\"'\"'s Alice'") +} + +func TestShellVariablesEncoderStripComments(t *testing.T) { + assertEncodesTo(t, "a: Alice # comment", "a=Alice") +} + +func TestShellVariablesEncoderMap(t *testing.T) { + assertEncodesTo(t, "a:\n b: Lewis\n c: Carroll", "a_b=Lewis\na_c=Carroll") +} + +func TestShellVariablesEncoderArray_Unwrapped(t *testing.T) { + assertEncodesTo(t, "a: [{n: Alice}, {n: Bob}]", "a_0_n=Alice\na_1_n=Bob") +} + +func TestShellVariablesEncoderKeyNonPrintable(t *testing.T) { + assertEncodesTo(t, `"be\all": ring!`, "bell='ring!'") +} + +func TestShellVariablesEncoderKeyPrintableNonAlphaNumeric(t *testing.T) { + assertEncodesTo(t, `"b-e l=l": ring!`, "b_e_l_l='ring!'") +} + +func TestShellVariablesEncoderKeyPrintableNonAscii(t *testing.T) { + assertEncodesTo(t, `"b\u00e9ll": ring!`, "bell='ring!'") +} + +func TestShellVariablesEncoderRootKeyStartingWithDigit(t *testing.T) { + assertEncodesTo(t, "1a: onea", "_1a=onea") +} + +func TestShellVariablesEncoderRootKeyStartingWithUnderscore(t *testing.T) { + assertEncodesTo(t, "_key: value", "_key=value") +} + +func TestShellVariablesEncoderChildStartingWithUnderscore(t *testing.T) { + assertEncodesTo(t, "root:\n _child: value", "root__child=value") +} + +func TestShellVariablesEncoderEmptyValue(t *testing.T) { + assertEncodesTo(t, "empty:", "empty=") +} + +func TestShellVariablesEncoderEmptyArray(t *testing.T) { + assertEncodesTo(t, "empty: []", "") +} + +func TestShellVariablesEncoderEmptyMap(t *testing.T) { + assertEncodesTo(t, "empty: {}", "") +} + +func TestShellVariablesEncoderScalarNode(t *testing.T) { + assertEncodesTo(t, "some string", "value='some string'") +} diff --git a/pkg/yqlib/printer.go b/pkg/yqlib/printer.go index a2d9f4ed..9cd65753 100644 --- a/pkg/yqlib/printer.go +++ b/pkg/yqlib/printer.go @@ -32,6 +32,7 @@ const ( UriOutputFormat ShOutputFormat TomlOutputFormat + ShellVariablesOutputFormat ) func OutputFormatFromString(format string) (PrinterOutputFormat, error) { @@ -50,8 +51,10 @@ func OutputFormatFromString(format string) (PrinterOutputFormat, error) { return XMLOutputFormat, nil case "toml": return TomlOutputFormat, nil + case "shell", "s", "sh": + return ShellVariablesOutputFormat, nil default: - return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml]", format) + return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml|toml|shell]", format) } } diff --git a/pkg/yqlib/shellvariables_test.go b/pkg/yqlib/shellvariables_test.go new file mode 100644 index 00000000..a922734e --- /dev/null +++ b/pkg/yqlib/shellvariables_test.go @@ -0,0 +1,98 @@ +package yqlib + +import ( + "bufio" + "fmt" + "testing" + + "github.com/mikefarah/yq/v4/test" +) + +var shellVariablesScenarios = []formatScenario{ + { + description: "Encode shell variables", + subdescription: "Note that comments are dropped and values will be enclosed in single quotes as needed.", + input: "" + + "# comment" + "\n" + + "name: Mike Wazowski" + "\n" + + "eyes:" + "\n" + + " color: turquoise" + "\n" + + " number: 1" + "\n" + + "friends:" + "\n" + + " - James P. Sullivan" + "\n" + + " - Celia Mae", + expected: "" + + "name='Mike Wazowski'" + "\n" + + "eyes_color=turquoise" + "\n" + + "eyes_number=1" + "\n" + + "friends_0='James P. Sullivan'" + "\n" + + "friends_1='Celia Mae'" + "\n", + }, + { + description: "Encode shell variables: illegal variable names as key.", + subdescription: "Keys that would be illegal as variable keys are adapted.", + input: "" + + "ascii_=_symbols: replaced with _" + "\n" + + "\"ascii_\t_controls\": dropped (this example uses \\t)" + "\n" + + "nonascii_\u05d0_characters: dropped" + "\n" + + "effrot_expe\u00f1ded_t\u00f2_preserve_accented_latin_letters: moderate (via unicode NFKD)" + "\n", + expected: "" + + "ascii___symbols='replaced with _'" + "\n" + + "ascii__controls='dropped (this example uses \\t)'" + "\n" + + "nonascii__characters=dropped" + "\n" + + "effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)'" + "\n", + }, + { + description: "Encode shell variables: empty values, arrays and maps", + subdescription: "Empty values are encoded to empty variables, but empty arrays and maps are skipped.", + input: "empty:\n value:\n array: []\n map: {}", + expected: "empty_value=" + "\n", + }, + { + description: "Encode shell variables: single quotes in values", + subdescription: "Single quotes in values are encoded as '\"'\"' (close single quote, double-quoted single quote, open single quote).", + input: "name: Miles O'Brien", + expected: `name='Miles O'"'"'Brien'` + "\n", + }, +} + +func TestShellVariableScenarios(t *testing.T) { + for _, s := range shellVariablesScenarios { + //fmt.Printf("\t<%s> <%s>\n", s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder())) + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder()), s.description) + } + genericScenarios := make([]interface{}, len(shellVariablesScenarios)) + for i, s := range shellVariablesScenarios { + genericScenarios[i] = s + } + documentScenarios(t, "usage", "shellvariables", genericScenarios, documentShellVaraibleScenario) +} + +func documentShellVaraibleScenario(_ *testing.T, w *bufio.Writer, i interface{}) { + s := i.(formatScenario) + if s.skipDoc { + return + } + writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) + + if s.subdescription != "" { + writeOrPanic(w, s.subdescription) + writeOrPanic(w, "\n\n") + } + + writeOrPanic(w, "Given a sample.yml file of:\n") + writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input)) + + writeOrPanic(w, "then\n") + + expression := s.expression + + if expression != "" { + writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=shell '%v' sample.yml\n```\n", expression)) + } else { + writeOrPanic(w, "```bash\nyq -o=shell sample.yml\n```\n") + } + writeOrPanic(w, "will output\n") + + writeOrPanic(w, fmt.Sprintf("```sh\n%v```\n\n", mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewShellVariablesEncoder()))) +} diff --git a/pkg/yqlib/utils.go b/pkg/yqlib/utils.go index a1afff27..d800e3b1 100644 --- a/pkg/yqlib/utils.go +++ b/pkg/yqlib/utils.go @@ -15,7 +15,7 @@ func readStream(filename string) (io.Reader, error) { reader = bufio.NewReader(os.Stdin) } else { // ignore CWE-22 gosec issue - that's more targeted for http based apps that run in a public directory, - // and ensuring that it's not possible to give a path to a file outside thar directory. + // and ensuring that it's not possible to give a path to a file outside that directory. file, err := os.Open(filename) // #nosec if err != nil { return nil, err