Csv decoder (#1290)

* WIP: adding CSV decoder

* Adding CSV decoder

* Added CSV roundtrip

* Fixing from review
This commit is contained in:
Mike Farah 2022-08-01 10:28:34 +10:00 committed by GitHub
parent 3c222d8707
commit c8815f5ab9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 837 additions and 87 deletions

View File

@ -3,6 +3,8 @@
setUp() { setUp() {
rm test*.yml 2>/dev/null || true rm test*.yml 2>/dev/null || true
rm test*.properties 2>/dev/null || true rm test*.properties 2>/dev/null || true
rm test*.csv 2>/dev/null || true
rm test*.tsv 2>/dev/null || true
rm test*.xml 2>/dev/null || true rm test*.xml 2>/dev/null || true
} }
@ -40,6 +42,51 @@ EOM
assertEquals "$expected" "$X" assertEquals "$expected" "$X"
} }
testInputCSV() {
cat >test.csv <<EOL
fruit,yumLevel
apple,5
banana,4
EOL
read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOM
X=$(./yq e -p=csv test.csv)
assertEquals "$expected" "$X"
X=$(./yq ea -p=csv test.csv)
assertEquals "$expected" "$X"
}
testInputTSV() {
cat >test.tsv <<EOL
fruit yumLevel
apple 5
banana 4
EOL
read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOM
X=$(./yq e -p=t test.tsv)
assertEquals "$expected" "$X"
X=$(./yq ea -p=t test.tsv)
assertEquals "$expected" "$X"
}
testInputXml() { testInputXml() {
cat >test.yml <<EOL cat >test.yml <<EOL
<cat legs="4">BiBi</cat> <cat legs="4">BiBi</cat>

View File

@ -102,6 +102,48 @@ EOM
assertEquals "$expected" "$X" assertEquals "$expected" "$X"
} }
testOutputCSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOL
read -r -d '' expected << EOM
fruit,yumLevel
apple,5
banana,4
EOM
X=$(./yq -o=c test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -o=csv test.yml)
assertEquals "$expected" "$X"
}
testOutputTSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOL
read -r -d '' expected << EOM
fruit yumLevel
apple 5
banana 4
EOM
X=$(./yq -o=t test.yml)
assertEquals "$expected" "$X"
X=$(./yq ea -o=tsv test.yml)
assertEquals "$expected" "$X"
}
testOutputXml() { testOutputXml() {
cat >test.yml <<EOL cat >test.yml <<EOL
a: {b: {c: ["cat"]}} a: {b: {c: ["cat"]}}

View File

@ -68,6 +68,10 @@ func configureDecoder() (yqlib.Decoder, error) {
return yqlib.NewPropertiesDecoder(), nil return yqlib.NewPropertiesDecoder(), nil
case yqlib.JsonInputFormat: case yqlib.JsonInputFormat:
return yqlib.NewJSONDecoder(), nil return yqlib.NewJSONDecoder(), nil
case yqlib.CSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder(','), nil
case yqlib.TSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder('\t'), nil
} }
return yqlib.NewYamlDecoder(), nil return yqlib.NewYamlDecoder(), nil

View File

@ -0,0 +1,3 @@
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
1 name numberOfCats likesApples height
2 Gary 1 true 168.8
3 Samantha's Rabbit 2 false -188.8

273
pkg/yqlib/csv_test.go Normal file
View File

@ -0,0 +1,273 @@
package yqlib
import (
"bufio"
"fmt"
"testing"
"github.com/mikefarah/yq/v4/test"
)
const csvSimple = `name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
`
const expectedUpdatedSimpleCsv = `name,numberOfCats,likesApples,height
Gary,3,true,168.8
Samantha's Rabbit,2,false,-188.8
`
const csvSimpleShort = `Name,Number of Cats
Gary,1
Samantha's Rabbit,2
`
const tsvSimple = `name numberOfCats likesApples height
Gary 1 true 168.8
Samantha's Rabbit 2 false -188.8
`
const expectedYamlFromCSV = `- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
`
const expectedYamlFromCSVMissingData = `- name: Gary
numberOfCats: 1
height: 168.8
- name: Samantha's Rabbit
height: -188.8
likesApples: false
`
const csvSimpleMissingData = `name,numberOfCats,height
Gary,1,168.8
Samantha's Rabbit,,-188.8
`
const csvTestSimpleYaml = `- [i, like, csv]
- [because, excel, is, cool]`
const expectedSimpleCsv = `i,like,csv
because,excel,is,cool
`
const tsvTestExpectedSimpleCsv = `i like csv
because excel is cool
`
var csvScenarios = []formatScenario{
{
description: "Encode CSV simple",
input: csvTestSimpleYaml,
expected: expectedSimpleCsv,
scenarioType: "encode-csv",
},
{
description: "Encode TSV simple",
input: csvTestSimpleYaml,
expected: tsvTestExpectedSimpleCsv,
scenarioType: "encode-tsv",
},
{
description: "Encode Empty",
skipDoc: true,
input: `[]`,
expected: "",
scenarioType: "encode-csv",
},
{
description: "Comma in value",
skipDoc: true,
input: `["comma, in, value", things]`,
expected: "\"comma, in, value\",things\n",
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to csv",
input: expectedYamlFromCSV,
expected: csvSimple,
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to custom csv format",
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.",
input: expectedYamlFromCSV,
expected: csvSimpleShort,
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to csv - missing fields behaviour",
subdescription: "First entry is used to determine the headers, and it is missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank",
input: expectedYamlFromCSVMissingData,
expected: csvSimpleMissingData,
scenarioType: "encode-csv",
},
{
description: "Parse CSV into an array of objects",
subdescription: "First row is assumed to be the header row.",
input: csvSimple,
expected: expectedYamlFromCSV,
scenarioType: "decode-csv-object",
},
{
description: "Parse TSV into an array of objects",
subdescription: "First row is assumed to be the header row.",
input: tsvSimple,
expected: expectedYamlFromCSV,
scenarioType: "decode-tsv-object",
},
{
description: "Round trip",
input: csvSimple,
expected: expectedUpdatedSimpleCsv,
expression: `(.[] | select(.name == "Gary") | .numberOfCats) = 3`,
scenarioType: "roundtrip-csv",
},
}
func testCSVScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "encode-csv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(',')), s.description)
case "encode-tsv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder('\t')), s.description)
case "decode-csv-object":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-tsv-object":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
case "roundtrip-csv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}
func documentCSVDecodeObjectScenario(t *testing.T, w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v sample.%v\n```\n", formatType, formatType))
writeOrPanic(w, "will output\n")
separator := ','
if formatType == "tsv" {
separator = '\t'
}
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
processFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, true, true))),
)
}
func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.yml file of:\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v '%v' sample.yml\n```\n", formatType, expression))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v sample.yml\n```\n", formatType))
}
writeOrPanic(w, "will output\n")
separator := ','
if formatType == "tsv" {
separator = '\t'
}
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(separator))),
)
}
func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v '%v' sample.%v\n```\n", formatType, formatType, expression, formatType))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v sample.%v\n```\n", formatType, formatType, formatType))
}
writeOrPanic(w, "will output\n")
separator := ','
if formatType == "tsv" {
separator = '\t'
}
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
processFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
)
}
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
return
}
switch s.scenarioType {
case "encode-csv":
documentCSVEncodeScenario(w, s, "csv")
case "encode-tsv":
documentCSVEncodeScenario(w, s, "tsv")
case "decode-csv-object":
documentCSVDecodeObjectScenario(t, w, s, "csv")
case "decode-tsv-object":
documentCSVDecodeObjectScenario(t, w, s, "tsv")
case "roundtrip-csv":
documentCSVRoundTripScenario(w, s, "csv")
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}
func TestCSVScenarios(t *testing.T) {
for _, tt := range csvScenarios {
testCSVScenario(t, tt)
}
genericScenarios := make([]interface{}, len(csvScenarios))
for i, s := range csvScenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "csv-tsv", genericScenarios, documentCSVScenario)
}

View File

@ -15,6 +15,8 @@ const (
PropertiesInputFormat PropertiesInputFormat
Base64InputFormat Base64InputFormat
JsonInputFormat JsonInputFormat
CSVObjectInputFormat
TSVObjectInputFormat
) )
type Decoder interface { type Decoder interface {
@ -32,6 +34,10 @@ func InputFormatFromString(format string) (InputFormat, error) {
return PropertiesInputFormat, nil return PropertiesInputFormat, nil
case "json", "ndjson", "j": case "json", "ndjson", "j":
return JsonInputFormat, nil return JsonInputFormat, nil
case "csv", "c":
return CSVObjectInputFormat, nil
case "tsv", "t":
return TSVObjectInputFormat, nil
default: default:
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format) return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format)
} }

View File

@ -0,0 +1,77 @@
package yqlib
import (
"encoding/csv"
"errors"
"io"
yaml "gopkg.in/yaml.v3"
)
type csvObjectDecoder struct {
separator rune
reader csv.Reader
finished bool
}
func NewCSVObjectDecoder(separator rune) Decoder {
return &csvObjectDecoder{separator: separator}
}
func (dec *csvObjectDecoder) Init(reader io.Reader) {
dec.reader = *csv.NewReader(reader)
dec.reader.Comma = dec.separator
dec.finished = false
}
func (dec *csvObjectDecoder) convertToYamlNode(content string) *yaml.Node {
node, err := parseSnippet(content)
if err != nil {
return createScalarNode(content, content)
}
return node
}
func (dec *csvObjectDecoder) createObject(headerRow []string, contentRow []string) *yaml.Node {
objectNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
for i, header := range headerRow {
objectNode.Content = append(
objectNode.Content,
createScalarNode(header, header),
dec.convertToYamlNode(contentRow[i]))
}
return objectNode
}
func (dec *csvObjectDecoder) Decode(rootYamlNode *yaml.Node) error {
if dec.finished {
return io.EOF
}
headerRow, err := dec.reader.Read()
log.Debugf(": headerRow%v", headerRow)
if err != nil {
return err
}
rootArray := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
contentRow, err := dec.reader.Read()
for err == nil && len(contentRow) > 0 {
log.Debugf("Adding contentRow: %v", contentRow)
rootArray.Content = append(rootArray.Content, dec.createObject(headerRow, contentRow))
contentRow, err = dec.reader.Read()
log.Debugf("Read next contentRow: %v, %v", contentRow, err)
}
if !errors.Is(err, io.EOF) {
return err
}
log.Debugf("finished, contentRow%v", contentRow)
log.Debugf("err: %v", err)
rootYamlNode.Kind = yaml.DocumentNode
rootYamlNode.Content = []*yaml.Node{rootArray}
return nil
}

View File

@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
| --- | -- | --| | --- | -- | --|
| Yaml | from_yaml | to_yaml(i)/@yaml | | Yaml | from_yaml | to_yaml(i)/@yaml |
| JSON | from_json | to_json(i)/@json | | JSON | from_json | to_json(i)/@json |
| Properties | from_props | to_props/@props | | Properties | from_props/@propsd | to_props/@props |
| CSV | | to_csv/@csv | | CSV | from_csv/@csvd | to_csv/@csv |
| TSV | | to_tsv/@tsv | | TSV | from_tsv/@tsvd | to_tsv/@tsv |
| XML | from_xml | to_xml(i)/@xml | | XML | from_xml | to_xml(i)/@xml |
| Base64 | @base64d | @base64 | | Base64 | @base64d | @base64 |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows). See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields. XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.
@ -132,7 +132,7 @@ a: |-
``` ```
then then
```bash ```bash
yq '.a |= from_props' sample.yml yq '.a |= @propsd' sample.yml
``` ```
will output will output
```yaml ```yaml
@ -141,6 +141,42 @@ a:
dogs: cool as well dogs: cool as well
``` ```
## Decode csv encoded string
Given a sample.yml file of:
```yaml
a: |-
cats,dogs
great,cool as well
```
then
```bash
yq '.a |= @csvd' sample.yml
```
will output
```yaml
a:
- cats: great
dogs: cool as well
```
## Decode tsv encoded string
Given a sample.yml file of:
```yaml
a: |-
cats dogs
great cool as well
```
then
```bash
yq '.a |= @tsvd' sample.yml
```
will output
```yaml
a:
- cats: great
dogs: cool as well
```
## Encode value as yaml string ## Encode value as yaml string
Indent defaults to 2 Indent defaults to 2

View File

@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
| --- | -- | --| | --- | -- | --|
| Yaml | from_yaml | to_yaml(i)/@yaml | | Yaml | from_yaml | to_yaml(i)/@yaml |
| JSON | from_json | to_json(i)/@json | | JSON | from_json | to_json(i)/@json |
| Properties | from_props | to_props/@props | | Properties | from_props/@propsd | to_props/@props |
| CSV | | to_csv/@csv | | CSV | from_csv/@csvd | to_csv/@csv |
| TSV | | to_tsv/@tsv | | TSV | from_tsv/@tsvd | to_tsv/@tsv |
| XML | from_xml | to_xml(i)/@xml | | XML | from_xml | to_xml(i)/@xml |
| Base64 | @base64d | @base64 | | Base64 | @base64d | @base64 |
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows). See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields. XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.

View File

@ -0,0 +1,214 @@
# CSV
Encode/Decode/Roundtrip CSV and TSV files.
## Encode
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
```yaml
- name: Bobo
type: dog
- name: Fifi
type: cat
```
As well as arrays of arrays of scalars (strings/numbers/booleans):
```yaml
- [Bobo, dog]
- [Fifi, cat]
```
## Decode
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
The data will be coded into an array of objects, using the header rows as keys.
```csv
name,type
Bobo,dog
Fifi,cat
```
{% hint style="warning" %}
Note that versions prior to 4.18 require the 'eval/e' command to be specified.&#x20;
`yq e <exp> <file>`
{% endhint %}
## Encode CSV simple
Given a sample.yml file of:
```yaml
- [i, like, csv]
- [because, excel, is, cool]
```
then
```bash
yq -o=csv sample.yml
```
will output
```csv
i,like,csv
because,excel,is,cool
```
## Encode TSV simple
Given a sample.yml file of:
```yaml
- [i, like, csv]
- [because, excel, is, cool]
```
then
```bash
yq -o=tsv sample.yml
```
will output
```tsv
i like csv
because excel is cool
```
## Encode array of objects to csv
Given a sample.yml file of:
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
```
then
```bash
yq -o=csv sample.yml
```
will output
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
```
## Encode array of objects to custom csv format
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.
Given a sample.yml file of:
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
```
then
```bash
yq -o=csv '[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]' sample.yml
```
will output
```csv
Name,Number of Cats
Gary,1
Samantha's Rabbit,2
```
## Encode array of objects to csv - missing fields behaviour
First entry is used to determine the headers, and it is missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank
Given a sample.yml file of:
```yaml
- name: Gary
numberOfCats: 1
height: 168.8
- name: Samantha's Rabbit
height: -188.8
likesApples: false
```
then
```bash
yq -o=csv sample.yml
```
will output
```csv
name,numberOfCats,height
Gary,1,168.8
Samantha's Rabbit,,-188.8
```
## Parse CSV into an array of objects
First row is assumed to be the header row.
Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
```
then
```bash
yq -p=csv sample.csv
```
will output
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
```
## Parse TSV into an array of objects
First row is assumed to be the header row.
Given a sample.tsv file of:
```tsv
name numberOfCats likesApples height
Gary 1 true 168.8
Samantha's Rabbit 2 false -188.8
```
then
```bash
yq -p=tsv sample.tsv
```
will output
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
```
## Round trip
Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
```
then
```bash
yq -p=csv -o=csv '(.[] | select(.name == "Gary") | .numberOfCats) = 3' sample.csv
```
will output
```csv
name,numberOfCats,likesApples,height
Gary,3,true,168.8
Samantha's Rabbit,2,false,-188.8
```

View File

@ -0,0 +1,30 @@
# CSV
Encode/Decode/Roundtrip CSV and TSV files.
## Encode
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
```yaml
- name: Bobo
type: dog
- name: Fifi
type: cat
```
As well as arrays of arrays of scalars (strings/numbers/booleans):
```yaml
- [Bobo, dog]
- [Fifi, cat]
```
## Decode
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
The data will be coded into an array of objects, using the header rows as keys.
```csv
name,type
Bobo,dog
Fifi,cat
```

View File

@ -1,5 +1,5 @@
# Properties # Properties
Encode to a property file (decode not yet supported). Line comments on value nodes will be copied across. Encode/Decode/Roundtrip to/from a property file. Line comments on value nodes will be copied across.
By default, empty maps and arrays are not encoded - see below for an example on how to encode a value for these. By default, empty maps and arrays are not encoded - see below for an example on how to encode a value for these.

View File

@ -1,6 +1,6 @@
# Properties # Properties
Encode to a property file (decode not yet supported). Line comments on value nodes will be copied across. Encode/Decode/Roundtrip to/from a property file. Line comments on value nodes will be copied across.
By default, empty maps and arrays are not encoded - see below for an example on how to encode a value for these. By default, empty maps and arrays are not encoded - see below for an example on how to encode a value for these.

View File

@ -13,7 +13,7 @@ type csvEncoder struct {
} }
func NewCsvEncoder(separator rune) Encoder { func NewCsvEncoder(separator rune) Encoder {
return &csvEncoder{separator} return &csvEncoder{separator: separator}
} }
func (e *csvEncoder) CanHandleAliases() bool { func (e *csvEncoder) CanHandleAliases() bool {
@ -41,6 +41,67 @@ func (e *csvEncoder) encodeRow(csvWriter *csv.Writer, contents []*yaml.Node) err
return csvWriter.Write(stringValues) return csvWriter.Write(stringValues)
} }
func (e *csvEncoder) encodeArrays(csvWriter *csv.Writer, content []*yaml.Node) error {
for i, child := range content {
if child.Kind != yaml.SequenceNode {
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
}
err := e.encodeRow(csvWriter, child.Content)
if err != nil {
return err
}
}
return nil
}
func (e *csvEncoder) extractHeader(child *yaml.Node) ([]*yaml.Node, error) {
if child.Kind != yaml.MappingNode {
return nil, fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[0] is a %v", child.Tag)
}
mapKeys := getMapKeys(child)
return mapKeys.Content, nil
}
func (e *csvEncoder) createChildRow(child *yaml.Node, headers []*yaml.Node) []*yaml.Node {
childRow := make([]*yaml.Node, 0)
for _, header := range headers {
keyIndex := findKeyInMap(child, header)
value := createScalarNode(nil, "")
if keyIndex != -1 {
value = child.Content[keyIndex+1]
}
childRow = append(childRow, value)
}
return childRow
}
func (e *csvEncoder) encodeObjects(csvWriter *csv.Writer, content []*yaml.Node) error {
headers, err := e.extractHeader(content[0])
if err != nil {
return nil
}
err = e.encodeRow(csvWriter, headers)
if err != nil {
return nil
}
for i, child := range content {
if child.Kind != yaml.MappingNode {
return fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[%v] is a %v", i, child.Tag)
}
row := e.createChildRow(child, headers)
err = e.encodeRow(csvWriter, row)
if err != nil {
return err
}
}
return nil
}
func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error { func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
csvWriter := csv.NewWriter(writer) csvWriter := csv.NewWriter(writer)
csvWriter.Comma = e.separator csvWriter.Comma = e.separator
@ -56,15 +117,10 @@ func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
return e.encodeRow(csvWriter, node.Content) return e.encodeRow(csvWriter, node.Content)
} }
for i, child := range node.Content { if node.Content[0].Kind == yaml.MappingNode {
return e.encodeObjects(csvWriter, node.Content)
if child.Kind != yaml.SequenceNode {
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
}
err := e.encodeRow(csvWriter, child.Content)
if err != nil {
return err
}
} }
return nil
return e.encodeArrays(csvWriter, node.Content)
} }

View File

@ -1,60 +0,0 @@
package yqlib
import (
"bufio"
"bytes"
"strings"
"testing"
"github.com/mikefarah/yq/v4/test"
)
func yamlToCsv(sampleYaml string, separator rune) string {
var output bytes.Buffer
writer := bufio.NewWriter(&output)
var jsonEncoder = NewCsvEncoder(separator)
inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder())
if err != nil {
panic(err)
}
node := inputs.Front().Value.(*CandidateNode).Node
err = jsonEncoder.Encode(writer, node)
if err != nil {
panic(err)
}
writer.Flush()
return strings.TrimSuffix(output.String(), "\n")
}
var sampleYaml = `["apple", apple2, "comma, in, value", "new
line", 3, 3.40, true, "tab here"]`
var sampleYamlArray = "[" + sampleYaml + ", [bob, cat, meow, puss]]"
func TestCsvEncoderEmptyArray(t *testing.T) {
var actualCsv = yamlToCsv(`[]`, ',')
test.AssertResult(t, "", actualCsv)
}
func TestCsvEncoder(t *testing.T) {
var expectedCsv = `apple,apple2,"comma, in, value",new line,3,3.40,true,tab here`
var actualCsv = yamlToCsv(sampleYaml, ',')
test.AssertResult(t, expectedCsv, actualCsv)
}
func TestCsvEncoderArrayOfArrays(t *testing.T) {
var actualCsv = yamlToCsv(sampleYamlArray, ',')
var expectedCsv = "apple,apple2,\"comma, in, value\",new line,3,3.40,true,tab here\nbob,cat,meow,puss"
test.AssertResult(t, expectedCsv, actualCsv)
}
func TestTsvEncoder(t *testing.T) {
var expectedCsv = `apple apple2 comma, in, value new line 3 3.40 true "tab here"`
var actualCsv = yamlToCsv(sampleYaml, '\t')
test.AssertResult(t, expectedCsv, actualCsv)
}

View File

@ -67,7 +67,10 @@ var participleYqRules = []*participleYqRule{
{"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0}, {"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0},
{"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0}, {"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0},
{"CSVDecode", `from_?csv|@csvd`, decodeOp(CSVObjectInputFormat), 0},
{"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0}, {"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0},
{"TSVDecode", `from_?tsv|@tsvd`, decodeOp(TSVObjectInputFormat), 0},
{"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0}, {"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0},
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0}, {"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},

View File

@ -523,7 +523,6 @@ var participleLexerScenarios = []participleLexerScenario{
} }
func TestParticipleLexer(t *testing.T) { func TestParticipleLexer(t *testing.T) {
log.Errorf("TestParticiple")
lexer := newParticipleLexer() lexer := newParticipleLexer()
for _, scenario := range participleLexerScenarios { for _, scenario := range participleLexerScenarios {

View File

@ -205,10 +205,10 @@ func findInArray(array *yaml.Node, item *yaml.Node) int {
return -1 return -1
} }
func findKeyInMap(array *yaml.Node, item *yaml.Node) int { func findKeyInMap(dataMap *yaml.Node, item *yaml.Node) int {
for index := 0; index < len(array.Content); index = index + 2 { for index := 0; index < len(dataMap.Content); index = index + 2 {
if recursiveNodeEqual(array.Content[index], item) { if recursiveNodeEqual(dataMap.Content[index], item) {
return index return index
} }
} }

View File

@ -114,6 +114,10 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
decoder = NewBase64Decoder() decoder = NewBase64Decoder()
case PropertiesInputFormat: case PropertiesInputFormat:
decoder = NewPropertiesDecoder() decoder = NewPropertiesDecoder()
case CSVObjectInputFormat:
decoder = NewCSVObjectDecoder(',')
case TSVObjectInputFormat:
decoder = NewCSVObjectDecoder('\t')
} }
var results = list.New() var results = list.New()

View File

@ -66,11 +66,27 @@ var encoderDecoderOperatorScenarios = []expressionScenario{
{ {
description: "Decode props encoded string", description: "Decode props encoded string",
document: `a: "cats=great\ndogs=cool as well"`, document: `a: "cats=great\ndogs=cool as well"`,
expression: `.a |= from_props`, expression: `.a |= @propsd`,
expected: []string{ expected: []string{
"D0, P[], (doc)::a:\n cats: great\n dogs: cool as well\n", "D0, P[], (doc)::a:\n cats: great\n dogs: cool as well\n",
}, },
}, },
{
description: "Decode csv encoded string",
document: `a: "cats,dogs\ngreat,cool as well"`,
expression: `.a |= @csvd`,
expected: []string{
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
},
},
{
description: "Decode tsv encoded string",
document: `a: "cats dogs\ngreat cool as well"`,
expression: `.a |= @tsvd`,
expected: []string{
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
},
},
{ {
skipDoc: true, skipDoc: true,
document: "a:\n cool:\n bob: dylan", document: "a:\n cool:\n bob: dylan",