mirror of
synced 2025-03-11 20:05:35 +00:00
Added CSV roundtrip
This commit is contained in:
@ -3,6 +3,8 @@
setUp() {
rm test*.yml 2>/dev/null || true
rm test*.properties 2>/dev/null || true
rm test*.csv 2>/dev/null || true
rm test*.tsv 2>/dev/null || true
rm test*.xml 2>/dev/null || true
@ -40,6 +42,51 @@ EOM
assertEquals "$expected" "$X"
testInputCSV() {
cat >test.csv <<EOL
read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
X=$(./yq e -p=csv test.csv)
assertEquals "$expected" "$X"
X=$(./yq ea -p=csv test.csv)
assertEquals "$expected" "$X"
testInputTSV() {
cat >test.tsv <<EOL
fruit yumLevel
apple 5
banana 4
read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
X=$(./yq e -p=t test.tsv)
assertEquals "$expected" "$X"
X=$(./yq ea -p=t test.tsv)
assertEquals "$expected" "$X"
testInputXml() {
cat >test.yml <<EOL
<cat legs="4">BiBi</cat>
@ -102,6 +102,48 @@ EOM
assertEquals "$expected" "$X"
testOutputCSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
read -r -d '' expected << EOM
X=$(./yq -o=c test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -o=csv test.yml)
assertEquals "$expected" "$X"
testOutputTSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
read -r -d '' expected << EOM
fruit yumLevel
apple 5
banana 4
X=$(./yq -o=t test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -o=tsv test.yml)
assertEquals "$expected" "$X"
testOutputXml() {
cat >test.yml <<EOL
a: {b: {c: ["cat"]}}
@ -13,6 +13,11 @@ Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
const expectedUpdatedSimpleCsv = `name,numberOfCats,likesApples,height
Samantha's Rabbit,2,false,-188.8
const csvSimpleShort = `Name,Number of Cats
Samantha's Rabbit,2
@ -33,10 +38,23 @@ const expectedYamlFromCSV = `- name: Gary
height: -188.8
const expectedYamlFromCSVMissingData = `- name: Gary
numberOfCats: 1
height: 168.8
- name: Samantha's Rabbit
height: -188.8
likesApples: false
const csvSimpleMissingData = `name,numberOfCats,height
Samantha's Rabbit,,-188.8
const csvTestSimpleYaml = `- [i, like, csv]
- [because, excel, is, cool]`
const csvTestExpectedSimpleCsv = `i,like,csv
const expectedSimpleCsv = `i,like,csv
@ -48,7 +66,7 @@ var csvScenarios = []formatScenario{
description: "Encode CSV simple",
input: csvTestSimpleYaml,
expected: csvTestExpectedSimpleCsv,
expected: expectedSimpleCsv,
scenarioType: "encode-csv",
@ -58,19 +76,38 @@ var csvScenarios = []formatScenario{
scenarioType: "encode-tsv",
description: "Encode array of objects to csv",
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.",
description: "Encode Empty",
skipDoc: true,
input: `[]`,
expected: "",
scenarioType: "encode-csv",
description: "Comma in value",
skipDoc: true,
input: `["comma, in, value", things]`,
expected: "\"comma, in, value\",things\n",
scenarioType: "encode-csv",
description: "Encode array of objects to csv",
input: expectedYamlFromCSV,
expected: csvSimple,
scenarioType: "encode-csv",
description: "Encode array of objects to custom csv format",
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.",
input: expectedYamlFromCSV,
expected: csvSimpleShort,
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
scenarioType: "encode-csv",
description: "Encode array of objects to csv - generic",
subdescription: "This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.",
input: expectedYamlFromCSV,
expected: csvSimple,
expression: `(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]`,
description: "Encode array of objects to csv - missing fields behaviour",
subdescription: "First entry is used to determine the headers, and it it missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank",
input: expectedYamlFromCSVMissingData,
expected: csvSimpleMissingData,
scenarioType: "encode-csv",
@ -87,6 +124,13 @@ var csvScenarios = []formatScenario{
expected: expectedYamlFromCSV,
scenarioType: "decode-tsv-object",
description: "Round trip",
input: csvSimple,
expected: expectedUpdatedSimpleCsv,
expression: `(.[] | select(.name == "Gary") | .numberOfCats) = 3`,
scenarioType: "roundtrip-csv",
func testCSVScenario(t *testing.T, s formatScenario) {
@ -99,6 +143,8 @@ func testCSVScenario(t *testing.T, s formatScenario) {
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-tsv-object":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
case "roundtrip-csv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
@ -161,6 +207,38 @@ func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType str
func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v '%v' sample.%v\n```\n", formatType, formatType, expression, formatType))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v sample.%v\n```\n", formatType, formatType, formatType))
writeOrPanic(w, "will output\n")
separator := ','
if formatType == "tsv" {
separator = '\t'
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
processFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
@ -175,6 +253,8 @@ func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
documentCSVDecodeObjectScenario(t, w, s, "csv")
case "decode-tsv-object":
documentCSVDecodeObjectScenario(t, w, s, "tsv")
case "roundtrip-csv":
documentCSVRoundTripScenario(w, s, "csv")
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
| --- | -- | --|
| Yaml | from_yaml | to_yaml(i)/@yaml |
| JSON | from_json | to_json(i)/@json |
| Properties | from_props | to_props/@props |
| CSV | | to_csv/@csv |
| TSV | | to_tsv/@tsv |
| Properties | from_props/@propsd | to_props/@props |
| CSV | from_csv/@csvd | to_csv/@csv |
| TSV | from_tsv/@tsvd | to_tsv/@tsv |
| XML | from_xml | to_xml(i)/@xml |
| Base64 | @base64d | @base64 |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.
@ -132,7 +132,7 @@ a: |-
yq '.a |= from_props' sample.yml
yq '.a |= @propsd' sample.yml
will output
@ -141,6 +141,42 @@ a:
dogs: cool as well
## Decode csv encoded string
Given a sample.yml file of:
a: |-
great,cool as well
yq '.a |= @csvd' sample.yml
will output
- cats: great
dogs: cool as well
## Decode tsv encoded string
Given a sample.yml file of:
a: |-
cats dogs
great cool as well
yq '.a |= @tsvd' sample.yml
will output
- cats: great
dogs: cool as well
## Encode value as yaml string
Indent defaults to 2
@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
| --- | -- | --|
| Yaml | from_yaml | to_yaml(i)/@yaml |
| JSON | from_json | to_json(i)/@json |
| Properties | from_props | to_props/@props |
| CSV | | to_csv/@csv |
| TSV | | to_tsv/@tsv |
| Properties | from_props/@propsd | to_props/@props |
| CSV | from_csv/@csvd | to_csv/@csv |
| TSV | from_tsv/@tsvd | to_tsv/@tsv |
| XML | from_xml | to_xml(i)/@xml |
| Base64 | @base64d | @base64 |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.
@ -1,5 +1,32 @@
Encode (arrays of arrays) data structures to CSV or TSV, Decode CSV, TSV into an array of objects.
Encode/Decode to CSV or TSV.
## Encode
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
- name: Bobo
type: dog
- name: Fifi
type: cat
As well as arrays of arrays of scalars (strings/numbers/booleans):
- [Bobo, dog]
- [Fifi, cat]
## Decode
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
The data will be coded into an array of objects, using the header rows as keys.
{% hint style="warning" %}
@ -41,7 +68,31 @@ because excel is cool
## Encode array of objects to csv
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.
Given a sample.yml file of:
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
yq -o=csv sample.yml
will output
Samantha's Rabbit,2,false,-188.8
## Encode array of objects to custom csv format
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.
Given a sample.yml file of:
@ -66,30 +117,28 @@ Gary,1
Samantha's Rabbit,2
## Encode array of objects to csv - generic
This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.
## Encode array of objects to csv - missing fields behaviour
First entry is used to determine the headers, and it it missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank
Given a sample.yml file of:
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
likesApples: false
yq -o=csv '(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]' sample.yml
yq -o=csv sample.yml
will output
Samantha's Rabbit,2,false,-188.8
Samantha's Rabbit,,-188.8
## Parse CSV into an array of objects
@ -144,3 +193,22 @@ will output
height: -188.8
## Round trip
Given a sample.csv file of:
Samantha's Rabbit,2,false,-188.8
yq -p=csv -o=csv '(.[] | select(.name == "Gary") | .numberOfCats) = 3' sample.csv
will output
Samantha's Rabbit,2,false,-188.8
@ -1,3 +1,30 @@
Encode (arrays of arrays) data structures to CSV or TSV, Decode CSV, TSV into an array of objects.
Encode/Decode to CSV or TSV.
## Encode
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
- name: Bobo
type: dog
- name: Fifi
type: cat
As well as arrays of arrays of scalars (strings/numbers/booleans):
- [Bobo, dog]
- [Fifi, cat]
## Decode
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
The data will be coded into an array of objects, using the header rows as keys.
@ -13,7 +13,7 @@ type csvEncoder struct {
func NewCsvEncoder(separator rune) Encoder {
return &csvEncoder{separator}
return &csvEncoder{separator: separator}
func (e *csvEncoder) CanHandleAliases() bool {
@ -41,6 +41,67 @@ func (e *csvEncoder) encodeRow(csvWriter *csv.Writer, contents []*yaml.Node) err
return csvWriter.Write(stringValues)
func (e *csvEncoder) encodeArrays(csvWriter *csv.Writer, content []*yaml.Node) error {
for i, child := range content {
if child.Kind != yaml.SequenceNode {
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
err := e.encodeRow(csvWriter, child.Content)
if err != nil {
return err
return nil
func (e *csvEncoder) extractHeader(child *yaml.Node) ([]*yaml.Node, error) {
if child.Kind != yaml.MappingNode {
return nil, fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[0] is a %v", child.Tag)
mapKeys := getMapKeys(child)
return mapKeys.Content, nil
func (e *csvEncoder) createChildRow(child *yaml.Node, headers []*yaml.Node) []*yaml.Node {
childRow := make([]*yaml.Node, 0)
for _, header := range headers {
keyIndex := findKeyInMap(child, header)
value := createScalarNode(nil, "")
if keyIndex != -1 {
value = child.Content[keyIndex+1]
childRow = append(childRow, value)
return childRow
func (e *csvEncoder) encodeObjects(csvWriter *csv.Writer, content []*yaml.Node) error {
headers, err := e.extractHeader(content[0])
if err != nil {
return nil
err = e.encodeRow(csvWriter, headers)
if err != nil {
return nil
for i, child := range content {
if child.Kind != yaml.MappingNode {
return fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[%v] is a %v", i, child.Tag)
row := e.createChildRow(child, headers)
err = e.encodeRow(csvWriter, row)
if err != nil {
return err
return nil
func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
csvWriter := csv.NewWriter(writer)
csvWriter.Comma = e.separator
@ -56,15 +117,10 @@ func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
return e.encodeRow(csvWriter, node.Content)
for i, child := range node.Content {
if child.Kind != yaml.SequenceNode {
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
err := e.encodeRow(csvWriter, child.Content)
if err != nil {
return err
if node.Content[0].Kind == yaml.MappingNode {
return e.encodeObjects(csvWriter, node.Content)
return nil
return e.encodeArrays(csvWriter, node.Content)
@ -1,60 +0,0 @@
package yqlib
import (
func yamlToCsv(sampleYaml string, separator rune) string {
var output bytes.Buffer
writer := bufio.NewWriter(&output)
var jsonEncoder = NewCsvEncoder(separator)
inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder())
if err != nil {
node := inputs.Front().Value.(*CandidateNode).Node
err = jsonEncoder.Encode(writer, node)
if err != nil {
return strings.TrimSuffix(output.String(), "\n")
var sampleYaml = `["apple", apple2, "comma, in, value", "new
line", 3, 3.40, true, "tab here"]`
var sampleYamlArray = "[" + sampleYaml + ", [bob, cat, meow, puss]]"
func TestCsvEncoderEmptyArray(t *testing.T) {
var actualCsv = yamlToCsv(`[]`, ',')
test.AssertResult(t, "", actualCsv)
func TestCsvEncoder(t *testing.T) {
var expectedCsv = `apple,apple2,"comma, in, value",new line,3,3.40,true,tab here`
var actualCsv = yamlToCsv(sampleYaml, ',')
test.AssertResult(t, expectedCsv, actualCsv)
func TestCsvEncoderArrayOfArrays(t *testing.T) {
var actualCsv = yamlToCsv(sampleYamlArray, ',')
var expectedCsv = "apple,apple2,\"comma, in, value\",new line,3,3.40,true,tab here\nbob,cat,meow,puss"
test.AssertResult(t, expectedCsv, actualCsv)
func TestTsvEncoder(t *testing.T) {
var expectedCsv = `apple apple2 comma, in, value new line 3 3.40 true "tab here"`
var actualCsv = yamlToCsv(sampleYaml, '\t')
test.AssertResult(t, expectedCsv, actualCsv)
@ -67,7 +67,10 @@ var participleYqRules = []*participleYqRule{
{"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0},
{"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0},
{"CSVDecode", `from_?csv|@csvd`, decodeOp(CSVObjectInputFormat), 0},
{"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0},
{"TSVDecode", `from_?tsv|@tsvd`, decodeOp(TSVObjectInputFormat), 0},
{"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0},
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},
@ -205,10 +205,10 @@ func findInArray(array *yaml.Node, item *yaml.Node) int {
return -1
func findKeyInMap(array *yaml.Node, item *yaml.Node) int {
func findKeyInMap(dataMap *yaml.Node, item *yaml.Node) int {
for index := 0; index < len(array.Content); index = index + 2 {
if recursiveNodeEqual(array.Content[index], item) {
for index := 0; index < len(dataMap.Content); index = index + 2 {
if recursiveNodeEqual(dataMap.Content[index], item) {
return index
@ -114,6 +114,10 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
decoder = NewBase64Decoder()
case PropertiesInputFormat:
decoder = NewPropertiesDecoder()
case CSVObjectInputFormat:
decoder = NewCSVObjectDecoder(',')
case TSVObjectInputFormat:
decoder = NewCSVObjectDecoder('\t')
var results = list.New()
@ -66,11 +66,27 @@ var encoderDecoderOperatorScenarios = []expressionScenario{
description: "Decode props encoded string",
document: `a: "cats=great\ndogs=cool as well"`,
expression: `.a |= from_props`,
expression: `.a |= @propsd`,
expected: []string{
"D0, P[], (doc)::a:\n cats: great\n dogs: cool as well\n",
description: "Decode csv encoded string",
document: `a: "cats,dogs\ngreat,cool as well"`,
expression: `.a |= @csvd`,
expected: []string{
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
description: "Decode tsv encoded string",
document: `a: "cats dogs\ngreat cool as well"`,
expression: `.a |= @tsvd`,
expected: []string{
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
skipDoc: true,
document: "a:\n cool:\n bob: dylan",
Reference in New Issue
Block a user