mirror of
https://github.com/mikefarah/yq.git
synced 2025-01-12 19:25:37 +00:00
WIP: adding CSV decoder
This commit is contained in:
parent
4508bc2dc2
commit
183a42c249
@ -68,6 +68,10 @@ func configureDecoder() (yqlib.Decoder, error) {
|
|||||||
return yqlib.NewPropertiesDecoder(), nil
|
return yqlib.NewPropertiesDecoder(), nil
|
||||||
case yqlib.JsonInputFormat:
|
case yqlib.JsonInputFormat:
|
||||||
return yqlib.NewJSONDecoder(), nil
|
return yqlib.NewJSONDecoder(), nil
|
||||||
|
case yqlib.CSVObjectInputFormat:
|
||||||
|
return yqlib.NewCSVObjectDecoder(','), nil
|
||||||
|
case yqlib.TSVObjectInputFormat:
|
||||||
|
return yqlib.NewCSVObjectDecoder('\t'), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return yqlib.NewYamlDecoder(), nil
|
return yqlib.NewYamlDecoder(), nil
|
||||||
|
193
pkg/yqlib/csv_test.go
Normal file
193
pkg/yqlib/csv_test.go
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
package yqlib
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mikefarah/yq/v4/test"
|
||||||
|
)
|
||||||
|
|
||||||
|
const csvSimple = `name,numberOfCats,likesApples,height
|
||||||
|
Gary,1,true,168.8
|
||||||
|
Samantha's Rabbit,2,false,-188.8
|
||||||
|
`
|
||||||
|
|
||||||
|
const csvSimpleShort = `Name,Number of Cats
|
||||||
|
Gary,1
|
||||||
|
Samantha's Rabbit,2
|
||||||
|
`
|
||||||
|
|
||||||
|
const tsvSimple = `name numberOfCats likesApples height
|
||||||
|
Gary 1 true 168.8
|
||||||
|
Samantha's Rabbit 2 false -188.8
|
||||||
|
`
|
||||||
|
|
||||||
|
const expectedYamlFromCSV = `- name: Gary
|
||||||
|
numberOfCats: 1
|
||||||
|
likesApples: true
|
||||||
|
height: 168.8
|
||||||
|
- name: Samantha's Rabbit
|
||||||
|
numberOfCats: 2
|
||||||
|
likesApples: false
|
||||||
|
height: -188.8
|
||||||
|
`
|
||||||
|
|
||||||
|
const csvTestSimpleYaml = `- [i, like, csv]
|
||||||
|
- [because, excel, is, cool]`
|
||||||
|
|
||||||
|
const csvTestExpectedSimpleCsv = `i,like,csv
|
||||||
|
because,excel,is,cool
|
||||||
|
`
|
||||||
|
|
||||||
|
const tsvTestExpectedSimpleCsv = `i like csv
|
||||||
|
because excel is cool
|
||||||
|
`
|
||||||
|
|
||||||
|
var csvScenarios = []formatScenario{
|
||||||
|
{
|
||||||
|
description: "Encode CSV simple",
|
||||||
|
input: csvTestSimpleYaml,
|
||||||
|
expected: csvTestExpectedSimpleCsv,
|
||||||
|
scenarioType: "encode-csv",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Encode TSV simple",
|
||||||
|
input: csvTestSimpleYaml,
|
||||||
|
expected: tsvTestExpectedSimpleCsv,
|
||||||
|
scenarioType: "encode-tsv",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Encode array of objects to csv",
|
||||||
|
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.",
|
||||||
|
input: expectedYamlFromCSV,
|
||||||
|
expected: csvSimpleShort,
|
||||||
|
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
|
||||||
|
scenarioType: "encode-csv",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Encode array of objects to csv - generic",
|
||||||
|
subdescription: "This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.",
|
||||||
|
input: expectedYamlFromCSV,
|
||||||
|
expected: csvSimple,
|
||||||
|
expression: `(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]`,
|
||||||
|
scenarioType: "encode-csv",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Parse CSV into an array of objects",
|
||||||
|
subdescription: "First row is assumed to define the fields",
|
||||||
|
input: csvSimple,
|
||||||
|
expected: expectedYamlFromCSV,
|
||||||
|
scenarioType: "decode-csv-object",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Parse TSV into an array of objects",
|
||||||
|
subdescription: "First row is assumed to define the fields",
|
||||||
|
input: tsvSimple,
|
||||||
|
expected: expectedYamlFromCSV,
|
||||||
|
scenarioType: "decode-tsv-object",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCSVScenario(t *testing.T, s formatScenario) {
|
||||||
|
switch s.scenarioType {
|
||||||
|
case "encode-csv":
|
||||||
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(',')), s.description)
|
||||||
|
case "encode-tsv":
|
||||||
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder('\t')), s.description)
|
||||||
|
case "decode-csv-object":
|
||||||
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
|
||||||
|
case "decode-tsv-object":
|
||||||
|
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func documentCSVDecodeObjectScenario(t *testing.T, w *bufio.Writer, s formatScenario, formatType string) {
|
||||||
|
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||||
|
|
||||||
|
if s.subdescription != "" {
|
||||||
|
writeOrPanic(w, s.subdescription)
|
||||||
|
writeOrPanic(w, "\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
|
||||||
|
|
||||||
|
writeOrPanic(w, "then\n")
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v sample.%v\n```\n", formatType, formatType))
|
||||||
|
writeOrPanic(w, "will output\n")
|
||||||
|
|
||||||
|
separator := ','
|
||||||
|
if formatType == "tsv" {
|
||||||
|
separator = '\t'
|
||||||
|
}
|
||||||
|
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
|
||||||
|
processFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, true, true))),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType string) {
|
||||||
|
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||||
|
|
||||||
|
if s.subdescription != "" {
|
||||||
|
writeOrPanic(w, s.subdescription)
|
||||||
|
writeOrPanic(w, "\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
writeOrPanic(w, "Given a sample.yml file of:\n")
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
|
||||||
|
|
||||||
|
writeOrPanic(w, "then\n")
|
||||||
|
|
||||||
|
expression := s.expression
|
||||||
|
|
||||||
|
if expression != "" {
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v '%v' sample.yml\n```\n", formatType, expression))
|
||||||
|
} else {
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v sample.yml\n```\n", formatType))
|
||||||
|
}
|
||||||
|
writeOrPanic(w, "will output\n")
|
||||||
|
|
||||||
|
separator := ','
|
||||||
|
if formatType == "tsv" {
|
||||||
|
separator = '\t'
|
||||||
|
}
|
||||||
|
|
||||||
|
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
|
||||||
|
processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(separator))),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
|
||||||
|
s := i.(formatScenario)
|
||||||
|
if s.skipDoc {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch s.scenarioType {
|
||||||
|
case "encode-csv":
|
||||||
|
documentCSVEncodeScenario(w, s, "csv")
|
||||||
|
case "encode-tsv":
|
||||||
|
documentCSVEncodeScenario(w, s, "tsv")
|
||||||
|
case "decode-csv-object":
|
||||||
|
documentCSVDecodeObjectScenario(t, w, s, "csv")
|
||||||
|
case "decode-tsv-object":
|
||||||
|
documentCSVDecodeObjectScenario(t, w, s, "tsv")
|
||||||
|
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCSVScenarios(t *testing.T) {
|
||||||
|
for _, tt := range csvScenarios {
|
||||||
|
testCSVScenario(t, tt)
|
||||||
|
}
|
||||||
|
genericScenarios := make([]interface{}, len(csvScenarios))
|
||||||
|
for i, s := range csvScenarios {
|
||||||
|
genericScenarios[i] = s
|
||||||
|
}
|
||||||
|
documentScenarios(t, "usage", "csv-tsv", genericScenarios, documentCSVScenario)
|
||||||
|
}
|
@ -15,6 +15,8 @@ const (
|
|||||||
PropertiesInputFormat
|
PropertiesInputFormat
|
||||||
Base64InputFormat
|
Base64InputFormat
|
||||||
JsonInputFormat
|
JsonInputFormat
|
||||||
|
CSVObjectInputFormat
|
||||||
|
TSVObjectInputFormat
|
||||||
)
|
)
|
||||||
|
|
||||||
type Decoder interface {
|
type Decoder interface {
|
||||||
@ -32,6 +34,10 @@ func InputFormatFromString(format string) (InputFormat, error) {
|
|||||||
return PropertiesInputFormat, nil
|
return PropertiesInputFormat, nil
|
||||||
case "json", "ndjson", "j":
|
case "json", "ndjson", "j":
|
||||||
return JsonInputFormat, nil
|
return JsonInputFormat, nil
|
||||||
|
case "csv":
|
||||||
|
return CSVObjectInputFormat, nil
|
||||||
|
case "tsv":
|
||||||
|
return TSVObjectInputFormat, nil
|
||||||
default:
|
default:
|
||||||
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format)
|
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format)
|
||||||
}
|
}
|
||||||
|
77
pkg/yqlib/decoder_csv_object.go
Normal file
77
pkg/yqlib/decoder_csv_object.go
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
package yqlib
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
yaml "gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
type csvObjectDecoder struct {
|
||||||
|
separator rune
|
||||||
|
reader csv.Reader
|
||||||
|
finished bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewCSVObjectDecoder(separator rune) Decoder {
|
||||||
|
return &csvObjectDecoder{separator: separator}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dec *csvObjectDecoder) Init(reader io.Reader) {
|
||||||
|
dec.reader = *csv.NewReader(reader)
|
||||||
|
dec.reader.Comma = dec.separator
|
||||||
|
dec.finished = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dec *csvObjectDecoder) convertToYamlNode(content string) *yaml.Node {
|
||||||
|
node, err := parseSnippet(content)
|
||||||
|
if err != nil {
|
||||||
|
return createScalarNode(content, content)
|
||||||
|
}
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dec *csvObjectDecoder) createObject(headerRow []string, contentRow []string) *yaml.Node {
|
||||||
|
objectNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
|
||||||
|
|
||||||
|
for i, header := range headerRow {
|
||||||
|
objectNode.Content = append(
|
||||||
|
objectNode.Content,
|
||||||
|
createScalarNode(header, header),
|
||||||
|
dec.convertToYamlNode(contentRow[i]))
|
||||||
|
}
|
||||||
|
return objectNode
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dec *csvObjectDecoder) Decode(rootYamlNode *yaml.Node) error {
|
||||||
|
if dec.finished {
|
||||||
|
return io.EOF
|
||||||
|
}
|
||||||
|
headerRow, err := dec.reader.Read()
|
||||||
|
log.Debugf(": headerRow%v", headerRow)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rootArray := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
|
||||||
|
|
||||||
|
contentRow, err := dec.reader.Read()
|
||||||
|
|
||||||
|
for err == nil && len(contentRow) > 0 {
|
||||||
|
log.Debugf("Adding contentRow: %v", contentRow)
|
||||||
|
rootArray.Content = append(rootArray.Content, dec.createObject(headerRow, contentRow))
|
||||||
|
contentRow, err = dec.reader.Read()
|
||||||
|
log.Debugf("Read next contentRow: %v, %v", contentRow, err)
|
||||||
|
}
|
||||||
|
if !errors.Is(err, io.EOF) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("finished, contentRow%v", contentRow)
|
||||||
|
log.Debugf("err: %v", err)
|
||||||
|
|
||||||
|
rootYamlNode.Kind = yaml.DocumentNode
|
||||||
|
rootYamlNode.Content = []*yaml.Node{rootArray}
|
||||||
|
return nil
|
||||||
|
}
|
143
pkg/yqlib/doc/usage/csv-tsv.md
Normal file
143
pkg/yqlib/doc/usage/csv-tsv.md
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
|
||||||
|
{% hint style="warning" %}
|
||||||
|
Note that versions prior to 4.18 require the 'eval/e' command to be specified. 
|
||||||
|
|
||||||
|
`yq e <exp> <file>`
|
||||||
|
{% endhint %}
|
||||||
|
|
||||||
|
## Encode CSV simple
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
- [i, like, csv]
|
||||||
|
- [because, excel, is, cool]
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -o=csv sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```csv
|
||||||
|
i,like,csv
|
||||||
|
because,excel,is,cool
|
||||||
|
```
|
||||||
|
|
||||||
|
## Encode TSV simple
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
- [i, like, csv]
|
||||||
|
- [because, excel, is, cool]
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -o=tsv sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```tsv
|
||||||
|
i like csv
|
||||||
|
because excel is cool
|
||||||
|
```
|
||||||
|
|
||||||
|
## Encode array of objects to csv
|
||||||
|
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.
|
||||||
|
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
- name: Gary
|
||||||
|
numberOfCats: 1
|
||||||
|
likesApples: true
|
||||||
|
height: 168.8
|
||||||
|
- name: Samantha's Rabbit
|
||||||
|
numberOfCats: 2
|
||||||
|
likesApples: false
|
||||||
|
height: -188.8
|
||||||
|
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -o=csv '[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```csv
|
||||||
|
Name,Number of Cats
|
||||||
|
Gary,1
|
||||||
|
Samantha's Rabbit,2
|
||||||
|
```
|
||||||
|
|
||||||
|
## Encode array of objects to csv - generic
|
||||||
|
This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.
|
||||||
|
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
- name: Gary
|
||||||
|
numberOfCats: 1
|
||||||
|
likesApples: true
|
||||||
|
height: 168.8
|
||||||
|
- name: Samantha's Rabbit
|
||||||
|
numberOfCats: 2
|
||||||
|
likesApples: false
|
||||||
|
height: -188.8
|
||||||
|
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -o=csv '(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```csv
|
||||||
|
name,numberOfCats,likesApples,height
|
||||||
|
Gary,1,true,168.8
|
||||||
|
Samantha's Rabbit,2,false,-188.8
|
||||||
|
```
|
||||||
|
|
||||||
|
## Parse CSV into an array of objects
|
||||||
|
First row is assumed to define the fields
|
||||||
|
|
||||||
|
Given a sample.csv file of:
|
||||||
|
```csv
|
||||||
|
name,numberOfCats,likesApples,height
|
||||||
|
Gary,1,true,168.8
|
||||||
|
Samantha's Rabbit,2,false,-188.8
|
||||||
|
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -p=csv sample.csv
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
- name: Gary
|
||||||
|
numberOfCats: 1
|
||||||
|
likesApples: true
|
||||||
|
height: 168.8
|
||||||
|
- name: Samantha's Rabbit
|
||||||
|
numberOfCats: 2
|
||||||
|
likesApples: false
|
||||||
|
height: -188.8
|
||||||
|
```
|
||||||
|
|
||||||
|
## Parse TSV into an array of objects
|
||||||
|
First row is assumed to define the fields
|
||||||
|
|
||||||
|
Given a sample.tsv file of:
|
||||||
|
```tsv
|
||||||
|
name numberOfCats likesApples height
|
||||||
|
Gary 1 true 168.8
|
||||||
|
Samantha's Rabbit 2 false -188.8
|
||||||
|
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq -p=tsv sample.tsv
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
- name: Gary
|
||||||
|
numberOfCats: 1
|
||||||
|
likesApples: true
|
||||||
|
height: 168.8
|
||||||
|
- name: Samantha's Rabbit
|
||||||
|
numberOfCats: 2
|
||||||
|
likesApples: false
|
||||||
|
height: -188.8
|
||||||
|
```
|
||||||
|
|
@ -31,7 +31,7 @@ type expressionScenario struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
logging.SetLevel(logging.ERROR, "")
|
logging.SetLevel(logging.DEBUG, "")
|
||||||
Now = func() time.Time {
|
Now = func() time.Time {
|
||||||
return time.Date(2021, time.May, 19, 1, 2, 3, 4, time.UTC)
|
return time.Date(2021, time.May, 19, 1, 2, 3, 4, time.UTC)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user