mirror of
https://github.com/mikefarah/yq.git
synced 2024-12-19 20:19:04 +00:00
Csv decoder (#1290)
* WIP: adding CSV decoder * Adding CSV decoder * Added CSV roundtrip * Fixing from review
This commit is contained in:
parent
3c222d8707
commit
c8815f5ab9
@ -3,6 +3,8 @@
|
||||
setUp() {
|
||||
rm test*.yml 2>/dev/null || true
|
||||
rm test*.properties 2>/dev/null || true
|
||||
rm test*.csv 2>/dev/null || true
|
||||
rm test*.tsv 2>/dev/null || true
|
||||
rm test*.xml 2>/dev/null || true
|
||||
}
|
||||
|
||||
@ -40,6 +42,51 @@ EOM
|
||||
assertEquals "$expected" "$X"
|
||||
}
|
||||
|
||||
testInputCSV() {
|
||||
cat >test.csv <<EOL
|
||||
fruit,yumLevel
|
||||
apple,5
|
||||
banana,4
|
||||
EOL
|
||||
|
||||
read -r -d '' expected << EOM
|
||||
- fruit: apple
|
||||
yumLevel: 5
|
||||
- fruit: banana
|
||||
yumLevel: 4
|
||||
EOM
|
||||
|
||||
X=$(./yq e -p=csv test.csv)
|
||||
assertEquals "$expected" "$X"
|
||||
|
||||
X=$(./yq ea -p=csv test.csv)
|
||||
assertEquals "$expected" "$X"
|
||||
}
|
||||
|
||||
testInputTSV() {
|
||||
cat >test.tsv <<EOL
|
||||
fruit yumLevel
|
||||
apple 5
|
||||
banana 4
|
||||
EOL
|
||||
|
||||
read -r -d '' expected << EOM
|
||||
- fruit: apple
|
||||
yumLevel: 5
|
||||
- fruit: banana
|
||||
yumLevel: 4
|
||||
EOM
|
||||
|
||||
X=$(./yq e -p=t test.tsv)
|
||||
assertEquals "$expected" "$X"
|
||||
|
||||
X=$(./yq ea -p=t test.tsv)
|
||||
assertEquals "$expected" "$X"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
testInputXml() {
|
||||
cat >test.yml <<EOL
|
||||
<cat legs="4">BiBi</cat>
|
||||
|
@ -102,6 +102,48 @@ EOM
|
||||
assertEquals "$expected" "$X"
|
||||
}
|
||||
|
||||
testOutputCSV() {
|
||||
cat >test.yml <<EOL
|
||||
- fruit: apple
|
||||
yumLevel: 5
|
||||
- fruit: banana
|
||||
yumLevel: 4
|
||||
EOL
|
||||
|
||||
read -r -d '' expected << EOM
|
||||
fruit,yumLevel
|
||||
apple,5
|
||||
banana,4
|
||||
EOM
|
||||
|
||||
X=$(./yq -o=c test.yml)
|
||||
assertEquals "$expected" "$X"
|
||||
|
||||
X=$(./yq ea -o=csv test.yml)
|
||||
assertEquals "$expected" "$X"
|
||||
}
|
||||
|
||||
testOutputTSV() {
|
||||
cat >test.yml <<EOL
|
||||
- fruit: apple
|
||||
yumLevel: 5
|
||||
- fruit: banana
|
||||
yumLevel: 4
|
||||
EOL
|
||||
|
||||
read -r -d '' expected << EOM
|
||||
fruit yumLevel
|
||||
apple 5
|
||||
banana 4
|
||||
EOM
|
||||
|
||||
X=$(./yq -o=t test.yml)
|
||||
assertEquals "$expected" "$X"
|
||||
|
||||
X=$(./yq ea -o=tsv test.yml)
|
||||
assertEquals "$expected" "$X"
|
||||
}
|
||||
|
||||
testOutputXml() {
|
||||
cat >test.yml <<EOL
|
||||
a: {b: {c: ["cat"]}}
|
||||
|
@ -68,6 +68,10 @@ func configureDecoder() (yqlib.Decoder, error) {
|
||||
return yqlib.NewPropertiesDecoder(), nil
|
||||
case yqlib.JsonInputFormat:
|
||||
return yqlib.NewJSONDecoder(), nil
|
||||
case yqlib.CSVObjectInputFormat:
|
||||
return yqlib.NewCSVObjectDecoder(','), nil
|
||||
case yqlib.TSVObjectInputFormat:
|
||||
return yqlib.NewCSVObjectDecoder('\t'), nil
|
||||
}
|
||||
|
||||
return yqlib.NewYamlDecoder(), nil
|
||||
|
3
examples/sample_objects.csv
Normal file
3
examples/sample_objects.csv
Normal file
@ -0,0 +1,3 @@
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
|
273
pkg/yqlib/csv_test.go
Normal file
273
pkg/yqlib/csv_test.go
Normal file
@ -0,0 +1,273 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/mikefarah/yq/v4/test"
|
||||
)
|
||||
|
||||
const csvSimple = `name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
`
|
||||
|
||||
const expectedUpdatedSimpleCsv = `name,numberOfCats,likesApples,height
|
||||
Gary,3,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
`
|
||||
|
||||
const csvSimpleShort = `Name,Number of Cats
|
||||
Gary,1
|
||||
Samantha's Rabbit,2
|
||||
`
|
||||
|
||||
const tsvSimple = `name numberOfCats likesApples height
|
||||
Gary 1 true 168.8
|
||||
Samantha's Rabbit 2 false -188.8
|
||||
`
|
||||
|
||||
const expectedYamlFromCSV = `- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
`
|
||||
|
||||
const expectedYamlFromCSVMissingData = `- name: Gary
|
||||
numberOfCats: 1
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
height: -188.8
|
||||
likesApples: false
|
||||
`
|
||||
|
||||
const csvSimpleMissingData = `name,numberOfCats,height
|
||||
Gary,1,168.8
|
||||
Samantha's Rabbit,,-188.8
|
||||
`
|
||||
|
||||
const csvTestSimpleYaml = `- [i, like, csv]
|
||||
- [because, excel, is, cool]`
|
||||
|
||||
const expectedSimpleCsv = `i,like,csv
|
||||
because,excel,is,cool
|
||||
`
|
||||
|
||||
const tsvTestExpectedSimpleCsv = `i like csv
|
||||
because excel is cool
|
||||
`
|
||||
|
||||
var csvScenarios = []formatScenario{
|
||||
{
|
||||
description: "Encode CSV simple",
|
||||
input: csvTestSimpleYaml,
|
||||
expected: expectedSimpleCsv,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Encode TSV simple",
|
||||
input: csvTestSimpleYaml,
|
||||
expected: tsvTestExpectedSimpleCsv,
|
||||
scenarioType: "encode-tsv",
|
||||
},
|
||||
{
|
||||
description: "Encode Empty",
|
||||
skipDoc: true,
|
||||
input: `[]`,
|
||||
expected: "",
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Comma in value",
|
||||
skipDoc: true,
|
||||
input: `["comma, in, value", things]`,
|
||||
expected: "\"comma, in, value\",things\n",
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Encode array of objects to csv",
|
||||
input: expectedYamlFromCSV,
|
||||
expected: csvSimple,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Encode array of objects to custom csv format",
|
||||
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.",
|
||||
input: expectedYamlFromCSV,
|
||||
expected: csvSimpleShort,
|
||||
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Encode array of objects to csv - missing fields behaviour",
|
||||
subdescription: "First entry is used to determine the headers, and it is missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank",
|
||||
input: expectedYamlFromCSVMissingData,
|
||||
expected: csvSimpleMissingData,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Parse CSV into an array of objects",
|
||||
subdescription: "First row is assumed to be the header row.",
|
||||
input: csvSimple,
|
||||
expected: expectedYamlFromCSV,
|
||||
scenarioType: "decode-csv-object",
|
||||
},
|
||||
{
|
||||
description: "Parse TSV into an array of objects",
|
||||
subdescription: "First row is assumed to be the header row.",
|
||||
input: tsvSimple,
|
||||
expected: expectedYamlFromCSV,
|
||||
scenarioType: "decode-tsv-object",
|
||||
},
|
||||
{
|
||||
description: "Round trip",
|
||||
input: csvSimple,
|
||||
expected: expectedUpdatedSimpleCsv,
|
||||
expression: `(.[] | select(.name == "Gary") | .numberOfCats) = 3`,
|
||||
scenarioType: "roundtrip-csv",
|
||||
},
|
||||
}
|
||||
|
||||
func testCSVScenario(t *testing.T, s formatScenario) {
|
||||
switch s.scenarioType {
|
||||
case "encode-csv":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(',')), s.description)
|
||||
case "encode-tsv":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder('\t')), s.description)
|
||||
case "decode-csv-object":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
|
||||
case "decode-tsv-object":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
|
||||
case "roundtrip-csv":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
|
||||
default:
|
||||
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
|
||||
}
|
||||
}
|
||||
|
||||
func documentCSVDecodeObjectScenario(t *testing.T, w *bufio.Writer, s formatScenario, formatType string) {
|
||||
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||
|
||||
if s.subdescription != "" {
|
||||
writeOrPanic(w, s.subdescription)
|
||||
writeOrPanic(w, "\n\n")
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
|
||||
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
|
||||
|
||||
writeOrPanic(w, "then\n")
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v sample.%v\n```\n", formatType, formatType))
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
separator := ','
|
||||
if formatType == "tsv" {
|
||||
separator = '\t'
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
|
||||
processFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, true, true))),
|
||||
)
|
||||
}
|
||||
|
||||
func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType string) {
|
||||
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||
|
||||
if s.subdescription != "" {
|
||||
writeOrPanic(w, s.subdescription)
|
||||
writeOrPanic(w, "\n\n")
|
||||
}
|
||||
|
||||
writeOrPanic(w, "Given a sample.yml file of:\n")
|
||||
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
|
||||
|
||||
writeOrPanic(w, "then\n")
|
||||
|
||||
expression := s.expression
|
||||
|
||||
if expression != "" {
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v '%v' sample.yml\n```\n", formatType, expression))
|
||||
} else {
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v sample.yml\n```\n", formatType))
|
||||
}
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
separator := ','
|
||||
if formatType == "tsv" {
|
||||
separator = '\t'
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
|
||||
processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(separator))),
|
||||
)
|
||||
}
|
||||
|
||||
func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType string) {
|
||||
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||
|
||||
if s.subdescription != "" {
|
||||
writeOrPanic(w, s.subdescription)
|
||||
writeOrPanic(w, "\n\n")
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
|
||||
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
|
||||
|
||||
writeOrPanic(w, "then\n")
|
||||
|
||||
expression := s.expression
|
||||
|
||||
if expression != "" {
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v '%v' sample.%v\n```\n", formatType, formatType, expression, formatType))
|
||||
} else {
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v sample.%v\n```\n", formatType, formatType, formatType))
|
||||
}
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
separator := ','
|
||||
if formatType == "tsv" {
|
||||
separator = '\t'
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
|
||||
processFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
|
||||
)
|
||||
}
|
||||
|
||||
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
|
||||
s := i.(formatScenario)
|
||||
if s.skipDoc {
|
||||
return
|
||||
}
|
||||
switch s.scenarioType {
|
||||
case "encode-csv":
|
||||
documentCSVEncodeScenario(w, s, "csv")
|
||||
case "encode-tsv":
|
||||
documentCSVEncodeScenario(w, s, "tsv")
|
||||
case "decode-csv-object":
|
||||
documentCSVDecodeObjectScenario(t, w, s, "csv")
|
||||
case "decode-tsv-object":
|
||||
documentCSVDecodeObjectScenario(t, w, s, "tsv")
|
||||
case "roundtrip-csv":
|
||||
documentCSVRoundTripScenario(w, s, "csv")
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCSVScenarios(t *testing.T) {
|
||||
for _, tt := range csvScenarios {
|
||||
testCSVScenario(t, tt)
|
||||
}
|
||||
genericScenarios := make([]interface{}, len(csvScenarios))
|
||||
for i, s := range csvScenarios {
|
||||
genericScenarios[i] = s
|
||||
}
|
||||
documentScenarios(t, "usage", "csv-tsv", genericScenarios, documentCSVScenario)
|
||||
}
|
@ -15,6 +15,8 @@ const (
|
||||
PropertiesInputFormat
|
||||
Base64InputFormat
|
||||
JsonInputFormat
|
||||
CSVObjectInputFormat
|
||||
TSVObjectInputFormat
|
||||
)
|
||||
|
||||
type Decoder interface {
|
||||
@ -32,6 +34,10 @@ func InputFormatFromString(format string) (InputFormat, error) {
|
||||
return PropertiesInputFormat, nil
|
||||
case "json", "ndjson", "j":
|
||||
return JsonInputFormat, nil
|
||||
case "csv", "c":
|
||||
return CSVObjectInputFormat, nil
|
||||
case "tsv", "t":
|
||||
return TSVObjectInputFormat, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format)
|
||||
}
|
||||
|
77
pkg/yqlib/decoder_csv_object.go
Normal file
77
pkg/yqlib/decoder_csv_object.go
Normal file
@ -0,0 +1,77 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type csvObjectDecoder struct {
|
||||
separator rune
|
||||
reader csv.Reader
|
||||
finished bool
|
||||
}
|
||||
|
||||
func NewCSVObjectDecoder(separator rune) Decoder {
|
||||
return &csvObjectDecoder{separator: separator}
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) Init(reader io.Reader) {
|
||||
dec.reader = *csv.NewReader(reader)
|
||||
dec.reader.Comma = dec.separator
|
||||
dec.finished = false
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) convertToYamlNode(content string) *yaml.Node {
|
||||
node, err := parseSnippet(content)
|
||||
if err != nil {
|
||||
return createScalarNode(content, content)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) createObject(headerRow []string, contentRow []string) *yaml.Node {
|
||||
objectNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
|
||||
|
||||
for i, header := range headerRow {
|
||||
objectNode.Content = append(
|
||||
objectNode.Content,
|
||||
createScalarNode(header, header),
|
||||
dec.convertToYamlNode(contentRow[i]))
|
||||
}
|
||||
return objectNode
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) Decode(rootYamlNode *yaml.Node) error {
|
||||
if dec.finished {
|
||||
return io.EOF
|
||||
}
|
||||
headerRow, err := dec.reader.Read()
|
||||
log.Debugf(": headerRow%v", headerRow)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootArray := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
|
||||
|
||||
contentRow, err := dec.reader.Read()
|
||||
|
||||
for err == nil && len(contentRow) > 0 {
|
||||
log.Debugf("Adding contentRow: %v", contentRow)
|
||||
rootArray.Content = append(rootArray.Content, dec.createObject(headerRow, contentRow))
|
||||
contentRow, err = dec.reader.Read()
|
||||
log.Debugf("Read next contentRow: %v, %v", contentRow, err)
|
||||
}
|
||||
if !errors.Is(err, io.EOF) {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debugf("finished, contentRow%v", contentRow)
|
||||
log.Debugf("err: %v", err)
|
||||
|
||||
rootYamlNode.Kind = yaml.DocumentNode
|
||||
rootYamlNode.Content = []*yaml.Node{rootArray}
|
||||
return nil
|
||||
}
|
@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
|
||||
| --- | -- | --|
|
||||
| Yaml | from_yaml | to_yaml(i)/@yaml |
|
||||
| JSON | from_json | to_json(i)/@json |
|
||||
| Properties | from_props | to_props/@props |
|
||||
| CSV | | to_csv/@csv |
|
||||
| TSV | | to_tsv/@tsv |
|
||||
| Properties | from_props/@propsd | to_props/@props |
|
||||
| CSV | from_csv/@csvd | to_csv/@csv |
|
||||
| TSV | from_tsv/@tsvd | to_tsv/@tsv |
|
||||
| XML | from_xml | to_xml(i)/@xml |
|
||||
| Base64 | @base64d | @base64 |
|
||||
|
||||
|
||||
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
|
||||
See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
|
||||
|
||||
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.
|
||||
|
||||
@ -132,7 +132,7 @@ a: |-
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq '.a |= from_props' sample.yml
|
||||
yq '.a |= @propsd' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
@ -141,6 +141,42 @@ a:
|
||||
dogs: cool as well
|
||||
```
|
||||
|
||||
## Decode csv encoded string
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
a: |-
|
||||
cats,dogs
|
||||
great,cool as well
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq '.a |= @csvd' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
a:
|
||||
- cats: great
|
||||
dogs: cool as well
|
||||
```
|
||||
|
||||
## Decode tsv encoded string
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
a: |-
|
||||
cats dogs
|
||||
great cool as well
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq '.a |= @tsvd' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
a:
|
||||
- cats: great
|
||||
dogs: cool as well
|
||||
```
|
||||
|
||||
## Encode value as yaml string
|
||||
Indent defaults to 2
|
||||
|
||||
|
@ -11,14 +11,14 @@ These operators are useful to process yaml documents that have stringified embed
|
||||
| --- | -- | --|
|
||||
| Yaml | from_yaml | to_yaml(i)/@yaml |
|
||||
| JSON | from_json | to_json(i)/@json |
|
||||
| Properties | from_props | to_props/@props |
|
||||
| CSV | | to_csv/@csv |
|
||||
| TSV | | to_tsv/@tsv |
|
||||
| Properties | from_props/@propsd | to_props/@props |
|
||||
| CSV | from_csv/@csvd | to_csv/@csv |
|
||||
| TSV | from_tsv/@tsvd | to_tsv/@tsv |
|
||||
| XML | from_xml | to_xml(i)/@xml |
|
||||
| Base64 | @base64d | @base64 |
|
||||
|
||||
|
||||
CSV and TSV format both accept either a single array or scalars (representing a single row), or an array of array of scalars (representing multiple rows).
|
||||
See CSV and TSV [documentation](https://mikefarah.gitbook.io/yq/usage/csv-tsv) for accepted formats.
|
||||
|
||||
XML uses the `--xml-attribute-prefix` and `xml-content-name` flags to identify attributes and content fields.
|
||||
|
||||
|
214
pkg/yqlib/doc/usage/csv-tsv.md
Normal file
214
pkg/yqlib/doc/usage/csv-tsv.md
Normal file
@ -0,0 +1,214 @@
|
||||
# CSV
|
||||
Encode/Decode/Roundtrip CSV and TSV files.
|
||||
|
||||
## Encode
|
||||
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
|
||||
|
||||
```yaml
|
||||
- name: Bobo
|
||||
type: dog
|
||||
- name: Fifi
|
||||
type: cat
|
||||
```
|
||||
|
||||
As well as arrays of arrays of scalars (strings/numbers/booleans):
|
||||
|
||||
```yaml
|
||||
- [Bobo, dog]
|
||||
- [Fifi, cat]
|
||||
```
|
||||
|
||||
## Decode
|
||||
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
|
||||
The data will be coded into an array of objects, using the header rows as keys.
|
||||
|
||||
```csv
|
||||
name,type
|
||||
Bobo,dog
|
||||
Fifi,cat
|
||||
```
|
||||
|
||||
|
||||
{% hint style="warning" %}
|
||||
Note that versions prior to 4.18 require the 'eval/e' command to be specified. 
|
||||
|
||||
`yq e <exp> <file>`
|
||||
{% endhint %}
|
||||
|
||||
## Encode CSV simple
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- [i, like, csv]
|
||||
- [because, excel, is, cool]
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
i,like,csv
|
||||
because,excel,is,cool
|
||||
```
|
||||
|
||||
## Encode TSV simple
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- [i, like, csv]
|
||||
- [because, excel, is, cool]
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=tsv sample.yml
|
||||
```
|
||||
will output
|
||||
```tsv
|
||||
i like csv
|
||||
because excel is cool
|
||||
```
|
||||
|
||||
## Encode array of objects to csv
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
```
|
||||
|
||||
## Encode array of objects to custom csv format
|
||||
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv '[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]' sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
Name,Number of Cats
|
||||
Gary,1
|
||||
Samantha's Rabbit,2
|
||||
```
|
||||
|
||||
## Encode array of objects to csv - missing fields behaviour
|
||||
First entry is used to determine the headers, and it is missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
height: -188.8
|
||||
likesApples: false
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
name,numberOfCats,height
|
||||
Gary,1,168.8
|
||||
Samantha's Rabbit,,-188.8
|
||||
```
|
||||
|
||||
## Parse CSV into an array of objects
|
||||
First row is assumed to be the header row.
|
||||
|
||||
Given a sample.csv file of:
|
||||
```csv
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -p=csv sample.csv
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
```
|
||||
|
||||
## Parse TSV into an array of objects
|
||||
First row is assumed to be the header row.
|
||||
|
||||
Given a sample.tsv file of:
|
||||
```tsv
|
||||
name numberOfCats likesApples height
|
||||
Gary 1 true 168.8
|
||||
Samantha's Rabbit 2 false -188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -p=tsv sample.tsv
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
```
|
||||
|
||||
## Round trip
|
||||
Given a sample.csv file of:
|
||||
```csv
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -p=csv -o=csv '(.[] | select(.name == "Gary") | .numberOfCats) = 3' sample.csv
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,3,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
```
|
||||
|
30
pkg/yqlib/doc/usage/headers/csv-tsv.md
Normal file
30
pkg/yqlib/doc/usage/headers/csv-tsv.md
Normal file
@ -0,0 +1,30 @@
|
||||
# CSV
|
||||
Encode/Decode/Roundtrip CSV and TSV files.
|
||||
|
||||
## Encode
|
||||
Currently supports arrays of homogenous flat objects, that is: no nesting and it assumes the _first_ object has all the keys required:
|
||||
|
||||
```yaml
|
||||
- name: Bobo
|
||||
type: dog
|
||||
- name: Fifi
|
||||
type: cat
|
||||
```
|
||||
|
||||
As well as arrays of arrays of scalars (strings/numbers/booleans):
|
||||
|
||||
```yaml
|
||||
- [Bobo, dog]
|
||||
- [Fifi, cat]
|
||||
```
|
||||
|
||||
## Decode
|
||||
Decode assumes the first CSV/TSV row is the header row, and all rows beneath are the entries.
|
||||
The data will be coded into an array of objects, using the header rows as keys.
|
||||
|
||||
```csv
|
||||
name,type
|
||||
Bobo,dog
|
||||
Fifi,cat
|
||||
```
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Properties
|
||||
|
||||
Encode to a property file (decode not yet supported). Line comments on value nodes will be copied across.
|
||||
Encode/Decode/Roundtrip to/from a property file. Line comments on value nodes will be copied across.
|
||||
|
||||
By default, empty maps and arrays are not encoded - see below for an example on how to encode a value for these.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Properties
|
||||
|
||||
Encode to a property file (decode not yet supported). Line comments on value nodes will be copied across.
|
||||
Encode/Decode/Roundtrip to/from a property file. Line comments on value nodes will be copied across.
|
||||
|
||||
By default, empty maps and arrays are not encoded - see below for an example on how to encode a value for these.
|
||||
|
||||
|
@ -13,7 +13,7 @@ type csvEncoder struct {
|
||||
}
|
||||
|
||||
func NewCsvEncoder(separator rune) Encoder {
|
||||
return &csvEncoder{separator}
|
||||
return &csvEncoder{separator: separator}
|
||||
}
|
||||
|
||||
func (e *csvEncoder) CanHandleAliases() bool {
|
||||
@ -41,6 +41,67 @@ func (e *csvEncoder) encodeRow(csvWriter *csv.Writer, contents []*yaml.Node) err
|
||||
return csvWriter.Write(stringValues)
|
||||
}
|
||||
|
||||
func (e *csvEncoder) encodeArrays(csvWriter *csv.Writer, content []*yaml.Node) error {
|
||||
for i, child := range content {
|
||||
|
||||
if child.Kind != yaml.SequenceNode {
|
||||
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
|
||||
}
|
||||
err := e.encodeRow(csvWriter, child.Content)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *csvEncoder) extractHeader(child *yaml.Node) ([]*yaml.Node, error) {
|
||||
if child.Kind != yaml.MappingNode {
|
||||
return nil, fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[0] is a %v", child.Tag)
|
||||
}
|
||||
mapKeys := getMapKeys(child)
|
||||
return mapKeys.Content, nil
|
||||
}
|
||||
|
||||
func (e *csvEncoder) createChildRow(child *yaml.Node, headers []*yaml.Node) []*yaml.Node {
|
||||
childRow := make([]*yaml.Node, 0)
|
||||
for _, header := range headers {
|
||||
keyIndex := findKeyInMap(child, header)
|
||||
value := createScalarNode(nil, "")
|
||||
if keyIndex != -1 {
|
||||
value = child.Content[keyIndex+1]
|
||||
}
|
||||
childRow = append(childRow, value)
|
||||
}
|
||||
return childRow
|
||||
|
||||
}
|
||||
|
||||
func (e *csvEncoder) encodeObjects(csvWriter *csv.Writer, content []*yaml.Node) error {
|
||||
headers, err := e.extractHeader(content[0])
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = e.encodeRow(csvWriter, headers)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i, child := range content {
|
||||
if child.Kind != yaml.MappingNode {
|
||||
return fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[%v] is a %v", i, child.Tag)
|
||||
}
|
||||
row := e.createChildRow(child, headers)
|
||||
err = e.encodeRow(csvWriter, row)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
|
||||
csvWriter := csv.NewWriter(writer)
|
||||
csvWriter.Comma = e.separator
|
||||
@ -56,15 +117,10 @@ func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error {
|
||||
return e.encodeRow(csvWriter, node.Content)
|
||||
}
|
||||
|
||||
for i, child := range node.Content {
|
||||
|
||||
if child.Kind != yaml.SequenceNode {
|
||||
return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag)
|
||||
}
|
||||
err := e.encodeRow(csvWriter, child.Content)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if node.Content[0].Kind == yaml.MappingNode {
|
||||
return e.encodeObjects(csvWriter, node.Content)
|
||||
}
|
||||
return nil
|
||||
|
||||
return e.encodeArrays(csvWriter, node.Content)
|
||||
|
||||
}
|
||||
|
@ -1,60 +0,0 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mikefarah/yq/v4/test"
|
||||
)
|
||||
|
||||
func yamlToCsv(sampleYaml string, separator rune) string {
|
||||
var output bytes.Buffer
|
||||
writer := bufio.NewWriter(&output)
|
||||
|
||||
var jsonEncoder = NewCsvEncoder(separator)
|
||||
inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
node := inputs.Front().Value.(*CandidateNode).Node
|
||||
err = jsonEncoder.Encode(writer, node)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
writer.Flush()
|
||||
|
||||
return strings.TrimSuffix(output.String(), "\n")
|
||||
}
|
||||
|
||||
var sampleYaml = `["apple", apple2, "comma, in, value", "new
|
||||
line", 3, 3.40, true, "tab here"]`
|
||||
|
||||
var sampleYamlArray = "[" + sampleYaml + ", [bob, cat, meow, puss]]"
|
||||
|
||||
func TestCsvEncoderEmptyArray(t *testing.T) {
|
||||
var actualCsv = yamlToCsv(`[]`, ',')
|
||||
test.AssertResult(t, "", actualCsv)
|
||||
}
|
||||
|
||||
func TestCsvEncoder(t *testing.T) {
|
||||
var expectedCsv = `apple,apple2,"comma, in, value",new line,3,3.40,true,tab here`
|
||||
|
||||
var actualCsv = yamlToCsv(sampleYaml, ',')
|
||||
test.AssertResult(t, expectedCsv, actualCsv)
|
||||
}
|
||||
|
||||
func TestCsvEncoderArrayOfArrays(t *testing.T) {
|
||||
var actualCsv = yamlToCsv(sampleYamlArray, ',')
|
||||
var expectedCsv = "apple,apple2,\"comma, in, value\",new line,3,3.40,true,tab here\nbob,cat,meow,puss"
|
||||
test.AssertResult(t, expectedCsv, actualCsv)
|
||||
}
|
||||
|
||||
func TestTsvEncoder(t *testing.T) {
|
||||
|
||||
var expectedCsv = `apple apple2 comma, in, value new line 3 3.40 true "tab here"`
|
||||
|
||||
var actualCsv = yamlToCsv(sampleYaml, '\t')
|
||||
test.AssertResult(t, expectedCsv, actualCsv)
|
||||
}
|
@ -67,7 +67,10 @@ var participleYqRules = []*participleYqRule{
|
||||
{"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0},
|
||||
{"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0},
|
||||
|
||||
{"CSVDecode", `from_?csv|@csvd`, decodeOp(CSVObjectInputFormat), 0},
|
||||
{"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0},
|
||||
|
||||
{"TSVDecode", `from_?tsv|@tsvd`, decodeOp(TSVObjectInputFormat), 0},
|
||||
{"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0},
|
||||
|
||||
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},
|
||||
|
@ -523,7 +523,6 @@ var participleLexerScenarios = []participleLexerScenario{
|
||||
}
|
||||
|
||||
func TestParticipleLexer(t *testing.T) {
|
||||
log.Errorf("TestParticiple")
|
||||
lexer := newParticipleLexer()
|
||||
|
||||
for _, scenario := range participleLexerScenarios {
|
||||
|
@ -205,10 +205,10 @@ func findInArray(array *yaml.Node, item *yaml.Node) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
func findKeyInMap(array *yaml.Node, item *yaml.Node) int {
|
||||
func findKeyInMap(dataMap *yaml.Node, item *yaml.Node) int {
|
||||
|
||||
for index := 0; index < len(array.Content); index = index + 2 {
|
||||
if recursiveNodeEqual(array.Content[index], item) {
|
||||
for index := 0; index < len(dataMap.Content); index = index + 2 {
|
||||
if recursiveNodeEqual(dataMap.Content[index], item) {
|
||||
return index
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +114,10 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
|
||||
decoder = NewBase64Decoder()
|
||||
case PropertiesInputFormat:
|
||||
decoder = NewPropertiesDecoder()
|
||||
case CSVObjectInputFormat:
|
||||
decoder = NewCSVObjectDecoder(',')
|
||||
case TSVObjectInputFormat:
|
||||
decoder = NewCSVObjectDecoder('\t')
|
||||
}
|
||||
|
||||
var results = list.New()
|
||||
|
@ -66,11 +66,27 @@ var encoderDecoderOperatorScenarios = []expressionScenario{
|
||||
{
|
||||
description: "Decode props encoded string",
|
||||
document: `a: "cats=great\ndogs=cool as well"`,
|
||||
expression: `.a |= from_props`,
|
||||
expression: `.a |= @propsd`,
|
||||
expected: []string{
|
||||
"D0, P[], (doc)::a:\n cats: great\n dogs: cool as well\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Decode csv encoded string",
|
||||
document: `a: "cats,dogs\ngreat,cool as well"`,
|
||||
expression: `.a |= @csvd`,
|
||||
expected: []string{
|
||||
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Decode tsv encoded string",
|
||||
document: `a: "cats dogs\ngreat cool as well"`,
|
||||
expression: `.a |= @tsvd`,
|
||||
expected: []string{
|
||||
"D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
skipDoc: true,
|
||||
document: "a:\n cool:\n bob: dylan",
|
||||
|
Loading…
Reference in New Issue
Block a user