Added CSV roundtrip

This commit is contained in:
Mike Farah 2022-08-01 10:05:26 +10:00
parent cb0bae128f
commit 9628aaf8c5
13 changed files with 424 additions and 105 deletions

View File

@ -3,6 +3,8 @@
setUp() {
rm test*.yml 2>/dev/null || true
rm test*.properties 2>/dev/null || true
rm test*.csv 2>/dev/null || true
rm test*.tsv 2>/dev/null || true
rm test*.xml 2>/dev/null || true
}
@ -40,6 +42,51 @@ EOM
assertEquals "$expected" "$X"
}
testInputCSV() {
cat >test.csv <<EOL
fruit,yumLevel
apple,5
banana,4
EOL
read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOM
X=$(./yq e -p=csv test.csv)
assertEquals "$expected" "$X"
X=$(./yq ea -p=csv test.csv)
assertEquals "$expected" "$X"
}
testInputTSV() {
cat >test.tsv <<EOL
fruit yumLevel
apple 5
banana 4
EOL
read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOM
X=$(./yq e -p=t test.tsv)
assertEquals "$expected" "$X"
X=$(./yq ea -p=t test.tsv)
assertEquals "$expected" "$X"
}
testInputXml() {
cat >test.yml <<EOL
<cat legs="4">BiBi</cat>

View File

@ -102,6 +102,48 @@ EOM
assertEquals "$expected" "$X"
}
testOutputCSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOL
read -r -d '' expected << EOM
fruit,yumLevel
apple,5
banana,4
EOM
X=$(./yq -o=c test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -o=csv test.yml)
assertEquals "$expected" "$X"
}
testOutputTSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOL
read -r -d '' expected << EOM
fruit yumLevel
apple 5
banana 4
EOM
X=$(./yq -o=t test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -o=tsv test.yml)
assertEquals "$expected" "$X"
}
testOutputXml() {
cat >test.yml <<EOL
a: {b: {c: ["cat"]}}

View File

@ -13,6 +13,11 @@ Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
`
const expectedUpdatedSimpleCsv = `name,numberOfCats,likesApples,height
Gary,3,true,168.8
Samantha's Rabbit,2,false,-188.8
`
const csvSimpleShort = `Name,Number of Cats
Gary,1
Samantha's Rabbit,2
@ -33,10 +38,23 @@ const expectedYamlFromCSV = `- name: Gary
height: -188.8
`
const expectedYamlFromCSVMissingData = `- name: Gary
numberOfCats: 1
height: 168.8
- name: Samantha's Rabbit
height: -188.8
likesApples: false
`
const csvSimpleMissingData = `name,numberOfCats,height
Gary,1,168.8
Samantha's Rabbit,,-188.8
`
const csvTestSimpleYaml = `- [i, like, csv]
- [because, excel, is, cool]`
const csvTestExpectedSimpleCsv = `i,like,csv
const expectedSimpleCsv = `i,like,csv
because,excel,is,cool
`
@ -48,7 +66,7 @@ var csvScenarios = []formatScenario{
{
description: "Encode CSV simple",
input: csvTestSimpleYaml,
expected: csvTestExpectedSimpleCsv,
expected: expectedSimpleCsv,
scenarioType: "encode-csv",
},
{
@ -57,20 +75,39 @@ var csvScenarios = []formatScenario{
expected: tsvTestExpectedSimpleCsv,
scenarioType: "encode-tsv",
},
{
description: "Encode Empty",
skipDoc: true,
input: `[]`,
expected: "",
scenarioType: "encode-csv",
},
{
description: "Comma in value",
skipDoc: true,
input: `["comma, in, value", things]`,
expected: "\"comma, in, value\",things\n",
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to csv",
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.",
input: expectedYamlFromCSV,
expected: csvSimple,
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to custom csv format",
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.",
input: expectedYamlFromCSV,
expected: csvSimpleShort,
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to csv - generic",
subdescription: "This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.",
input: expectedYamlFromCSV,
expected: csvSimple,
expression: `(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]`,
description: "Encode array of objects to csv - missing fields behaviour",
subdescription: "First entry is used to determine the headers, and it it missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank",
input: expectedYamlFromCSVMissingData,
expected: csvSimpleMissingData,
scenarioType: "encode-csv",
},
{
@ -87,6 +124,13 @@ var csvScenarios = []formatScenario{
expected: expectedYamlFromCSV,
scenarioType: "decode-tsv-object",
},
{
description: "Round trip",
input: csvSimple,
expected: expectedUpdatedSimpleCsv,
expression: `(.[] | select(.name == "Gary") | .numberOfCats) = 3`,
scenarioType: "roundtrip-csv",
},
}
func testCSVScenario(t *testing.T, s formatScenario) {
@ -99,6 +143,8 @@ func testCSVScenario(t *testing.T, s formatScenario) {
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-tsv-object":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
case "roundtrip-csv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
@ -161,6 +207,38 @@ func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType str
)
}
func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v '%v' sample.%v\n```\n", formatType, formatType, expression, formatType))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v sample.%v\n```\n", formatType, formatType, formatType))
}
writeOrPanic(w, "will output\n")
separator := ','
if formatType == "tsv" {
separator = '\t'
}
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
processFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
)
}
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
@ -175,6 +253,8 @@ func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
documentCSVDecodeObjectScenario(t, w, s, "csv")
case "decode-tsv-object":
documentCSVDecodeObjectScenario(t, w, s, "tsv")
case "roundtrip-csv":
documentCSVRoundTripScenario(w, s, "csv")
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))

View File

@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
| --- | -- | --|
| Yaml | from_yaml | to_yaml(i)/@yaml |
| JSON | from_json | to_json(i)/@json |
| Properties | from_props | to_props/@props |
| CSV | | to_csv/@csv |
| TSV | | to_tsv/@tsv |
| Properties | from_props/@propsd | to_props/@props |
| CSV | from_csv/@csvd | to_csv/@csv |
| TSV | from_tsv/@tsvd | to_tsv/@tsv |
| XML | from_xml | to_xml(i)/@xml |
| Base64 | @base64d | @base64 |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.
@ -132,7 +132,7 @@ a: |-
```
then
```bash
yq '.a |= from_props' sample.yml
yq '.a |= @propsd' sample.yml
```
will output
```yaml
@ -141,6 +141,42 @@ a:
dogs: cool as well
```
## Decode csv encoded string
Given a sample.yml file of:
```yaml
a: |-
cats,dogs
great,cool as well
```
then
```bash
yq '.a |= @csvd' sample.yml
```
will output
```yaml
a:
- cats: great
dogs: cool as well
```
## Decode tsv encoded string
Given a sample.yml file of:
```yaml
a: |-
cats dogs
great cool as well
```
then
```bash
yq '.a |= @tsvd' sample.yml
```
will output
```yaml
a:
- cats: great
dogs: cool as well
```
## Encode value as yaml string
Indent defaults to 2

View File

@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
| --- | -- | --|
| Yaml | from_yaml | to_yaml(i)/@yaml |
| JSON | from_json | to_json(i)/@json |
| Properties | from_props | to_props/@props |
| CSV | | to_csv/@csv |
| TSV | | to_tsv/@tsv |
| Properties | from_props/@propsd | to_props/@props |
| CSV | from_csv/@csvd | to_csv/@csv |
| TSV | from_tsv/@tsvd | to_tsv/@tsv |
| XML | from_xml | to_xml(i)/@xml |
| Base64 | @base64d | @base64 |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.

View File

@ -1,5 +1,32 @@
# CSV
Encode (arrays of arrays) data structures to CSV or TSV, Decode CSV, TSV into an array of objects.
Encode/Decode to CSV or TSV.
## Encode
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
```yaml
- name: Bobo
type: dog
- name: Fifi
type: cat
```
As well as arrays of arrays of scalars (strings/numbers/booleans):
```yaml
- [Bobo, dog]
- [Fifi, cat]
```
## Decode
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
The data will be coded into an array of objects, using the header rows as keys.
```csv
name,type
Bobo,dog
Fifi,cat
```
{% hint style="warning" %}
@ -41,7 +68,31 @@ because excel is cool
```
## Encode array of objects to csv
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.
Given a sample.yml file of:
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
```
then
```bash
yq -o=csv sample.yml
```
will output
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
```
## Encode array of objects to custom csv format
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.
Given a sample.yml file of:
```yaml
@ -66,30 +117,28 @@ Gary,1
Samantha's Rabbit,2
```
## Encode array of objects to csv - generic
This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.
## Encode array of objects to csv - missing fields behaviour
First entry is used to determine the headers, and it it missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank
Given a sample.yml file of:
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
likesApples: false
```
then
```bash
yq -o=csv '(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]' sample.yml
yq -o=csv sample.yml
```
will output
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
name,numberOfCats,height
Gary,1,168.8
Samantha's Rabbit,,-188.8
```
## Parse CSV into an array of objects
@ -144,3 +193,22 @@ will output
height: -188.8
```
## Round trip
Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
```
then
```bash
yq -p=csv -o=csv '(.[] | select(.name == "Gary") | .numberOfCats) = 3' sample.csv
```
will output
```csv
name,numberOfCats,likesApples,height
Gary,3,true,168.8
Samantha's Rabbit,2,false,-188.8
```

View File

@ -1,3 +1,30 @@
# CSV
Encode (arrays of arrays) data structures to CSV or TSV, Decode CSV, TSV into an array of objects.
Encode/Decode to CSV or TSV.
## Encode
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
```yaml
- name: Bobo
type: dog
- name: Fifi
type: cat
```
As well as arrays of arrays of scalars (strings/numbers/booleans):
```yaml
- [Bobo, dog]
- [Fifi, cat]
```
## Decode
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
The data will be coded into an array of objects, using the header rows as keys.
```csv
name,type
Bobo,dog
Fifi,cat
```

View File

@ -13,7 +13,7 @@ type csvEncoder struct {
}
func NewCsvEncoder(separator rune) Encoder {
return &csvEncoder{separator}
return &csvEncoder{separator: separator}
}
func (e *csvEncoder) CanHandleAliases() bool {
@ -41,6 +41,67 @@ func (e *csvEncoder) encodeRow(csvWriter *csv.Writer, contents []*yaml.Node) err
return csvWriter.Write(stringValues)
}
func (e *csvEncoder) encodeArrays(csvWriter *csv.Writer, content []*yaml.Node) error {
for i, child := range content {
if child.Kind != yaml.SequenceNode {
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
}
err := e.encodeRow(csvWriter, child.Content)
if err != nil {
return err
}
}
return nil
}
func (e *csvEncoder) extractHeader(child *yaml.Node) ([]*yaml.Node, error) {
if child.Kind != yaml.MappingNode {
return nil, fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[0] is a %v", child.Tag)
}
mapKeys := getMapKeys(child)
return mapKeys.Content, nil
}
func (e *csvEncoder) createChildRow(child *yaml.Node, headers []*yaml.Node) []*yaml.Node {
childRow := make([]*yaml.Node, 0)
for _, header := range headers {
keyIndex := findKeyInMap(child, header)
value := createScalarNode(nil, "")
if keyIndex != -1 {
value = child.Content[keyIndex+1]
}
childRow = append(childRow, value)
}
return childRow
}
func (e *csvEncoder) encodeObjects(csvWriter *csv.Writer, content []*yaml.Node) error {
headers, err := e.extractHeader(content[0])
if err != nil {
return nil
}
err = e.encodeRow(csvWriter, headers)
if err != nil {
return nil
}
for i, child := range content {
if child.Kind != yaml.MappingNode {
return fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[%v] is a %v", i, child.Tag)
}
row := e.createChildRow(child, headers)
err = e.encodeRow(csvWriter, row)
if err != nil {
return err
}
}
return nil
}
func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
csvWriter := csv.NewWriter(writer)
csvWriter.Comma = e.separator
@ -56,15 +117,10 @@ func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
return e.encodeRow(csvWriter, node.Content)
}
for i, child := range node.Content {
if node.Content[0].Kind == yaml.MappingNode {
return e.encodeObjects(csvWriter, node.Content)
}
return e.encodeArrays(csvWriter, node.Content)
if child.Kind != yaml.SequenceNode {
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
}
err := e.encodeRow(csvWriter, child.Content)
if err != nil {
return err
}
}
return nil
}

View File

@ -1,60 +0,0 @@
package yqlib
import (
"bufio"
"bytes"
"strings"
"testing"
"github.com/mikefarah/yq/v4/test"
)
func yamlToCsv(sampleYaml string, separator rune) string {
var output bytes.Buffer
writer := bufio.NewWriter(&output)
var jsonEncoder = NewCsvEncoder(separator)
inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder())
if err != nil {
panic(err)
}
node := inputs.Front().Value.(*CandidateNode).Node
err = jsonEncoder.Encode(writer, node)
if err != nil {
panic(err)
}
writer.Flush()
return strings.TrimSuffix(output.String(), "\n")
}
var sampleYaml = `["apple", apple2, "comma, in, value", "new
line", 3, 3.40, true, "tab here"]`
var sampleYamlArray = "[" + sampleYaml + ", [bob, cat, meow, puss]]"
func TestCsvEncoderEmptyArray(t *testing.T) {
var actualCsv = yamlToCsv(`[]`, ',')
test.AssertResult(t, "", actualCsv)
}
func TestCsvEncoder(t *testing.T) {
var expectedCsv = `apple,apple2,"comma, in, value",new line,3,3.40,true,tab here`
var actualCsv = yamlToCsv(sampleYaml, ',')
test.AssertResult(t, expectedCsv, actualCsv)
}
func TestCsvEncoderArrayOfArrays(t *testing.T) {
var actualCsv = yamlToCsv(sampleYamlArray, ',')
var expectedCsv = "apple,apple2,\"comma, in, value\",new line,3,3.40,true,tab here\nbob,cat,meow,puss"
test.AssertResult(t, expectedCsv, actualCsv)
}
func TestTsvEncoder(t *testing.T) {
var expectedCsv = `apple apple2 comma, in, value new line 3 3.40 true "tab here"`
var actualCsv = yamlToCsv(sampleYaml, '\t')
test.AssertResult(t, expectedCsv, actualCsv)
}

View File

@ -67,7 +67,10 @@ var participleYqRules = []*participleYqRule{
{"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0},
{"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0},
{"CSVDecode", `from_?csv|@csvd`, decodeOp(CSVObjectInputFormat), 0},
{"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0},
{"TSVDecode", `from_?tsv|@tsvd`, decodeOp(TSVObjectInputFormat), 0},
{"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0},
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},

View File

@ -205,10 +205,10 @@ func findInArray(array *yaml.Node, item *yaml.Node) int {
return -1
}
func findKeyInMap(array *yaml.Node, item *yaml.Node) int {
func findKeyInMap(dataMap *yaml.Node, item *yaml.Node) int {
for index := 0; index < len(array.Content); index = index + 2 {
if recursiveNodeEqual(array.Content[index], item) {
for index := 0; index < len(dataMap.Content); index = index + 2 {
if recursiveNodeEqual(dataMap.Content[index], item) {
return index
}
}

View File

@ -114,6 +114,10 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
decoder = NewBase64Decoder()
case PropertiesInputFormat:
decoder = NewPropertiesDecoder()
case CSVObjectInputFormat:
decoder = NewCSVObjectDecoder(',')
case TSVObjectInputFormat:
decoder = NewCSVObjectDecoder('\t')
}
var results = list.New()

View File

@ -66,11 +66,27 @@ var encoderDecoderOperatorScenarios = []expressionScenario{
{
description: "Decode props encoded string",
document: `a: "cats=great\ndogs=cool as well"`,
expression: `.a |= from_props`,
expression: `.a |= @propsd`,
expected: []string{
"D0, P[], (doc)::a:\n cats: great\n dogs: cool as well\n",
},
},
{
description: "Decode csv encoded string",
document: `a: "cats,dogs\ngreat,cool as well"`,
expression: `.a |= @csvd`,
expected: []string{
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
},
},
{
description: "Decode tsv encoded string",
document: `a: "cats dogs\ngreat cool as well"`,
expression: `.a |= @tsvd`,
expected: []string{
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
},
},
{
skipDoc: true,
document: "a:\n cool:\n bob: dylan",