mirror of
https://github.com/mikefarah/yq.git
synced 2025-01-12 19:25:37 +00:00
WIP: adding CSV decoder
This commit is contained in:
parent
4508bc2dc2
commit
183a42c249
@ -68,6 +68,10 @@ func configureDecoder() (yqlib.Decoder, error) {
|
||||
return yqlib.NewPropertiesDecoder(), nil
|
||||
case yqlib.JsonInputFormat:
|
||||
return yqlib.NewJSONDecoder(), nil
|
||||
case yqlib.CSVObjectInputFormat:
|
||||
return yqlib.NewCSVObjectDecoder(','), nil
|
||||
case yqlib.TSVObjectInputFormat:
|
||||
return yqlib.NewCSVObjectDecoder('\t'), nil
|
||||
}
|
||||
|
||||
return yqlib.NewYamlDecoder(), nil
|
||||
|
193
pkg/yqlib/csv_test.go
Normal file
193
pkg/yqlib/csv_test.go
Normal file
@ -0,0 +1,193 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/mikefarah/yq/v4/test"
|
||||
)
|
||||
|
||||
const csvSimple = `name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
`
|
||||
|
||||
const csvSimpleShort = `Name,Number of Cats
|
||||
Gary,1
|
||||
Samantha's Rabbit,2
|
||||
`
|
||||
|
||||
const tsvSimple = `name numberOfCats likesApples height
|
||||
Gary 1 true 168.8
|
||||
Samantha's Rabbit 2 false -188.8
|
||||
`
|
||||
|
||||
const expectedYamlFromCSV = `- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
`
|
||||
|
||||
const csvTestSimpleYaml = `- [i, like, csv]
|
||||
- [because, excel, is, cool]`
|
||||
|
||||
const csvTestExpectedSimpleCsv = `i,like,csv
|
||||
because,excel,is,cool
|
||||
`
|
||||
|
||||
const tsvTestExpectedSimpleCsv = `i like csv
|
||||
because excel is cool
|
||||
`
|
||||
|
||||
var csvScenarios = []formatScenario{
|
||||
{
|
||||
description: "Encode CSV simple",
|
||||
input: csvTestSimpleYaml,
|
||||
expected: csvTestExpectedSimpleCsv,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Encode TSV simple",
|
||||
input: csvTestSimpleYaml,
|
||||
expected: tsvTestExpectedSimpleCsv,
|
||||
scenarioType: "encode-tsv",
|
||||
},
|
||||
{
|
||||
description: "Encode array of objects to csv",
|
||||
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.",
|
||||
input: expectedYamlFromCSV,
|
||||
expected: csvSimpleShort,
|
||||
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Encode array of objects to csv - generic",
|
||||
subdescription: "This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.",
|
||||
input: expectedYamlFromCSV,
|
||||
expected: csvSimple,
|
||||
expression: `(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]`,
|
||||
scenarioType: "encode-csv",
|
||||
},
|
||||
{
|
||||
description: "Parse CSV into an array of objects",
|
||||
subdescription: "First row is assumed to define the fields",
|
||||
input: csvSimple,
|
||||
expected: expectedYamlFromCSV,
|
||||
scenarioType: "decode-csv-object",
|
||||
},
|
||||
{
|
||||
description: "Parse TSV into an array of objects",
|
||||
subdescription: "First row is assumed to define the fields",
|
||||
input: tsvSimple,
|
||||
expected: expectedYamlFromCSV,
|
||||
scenarioType: "decode-tsv-object",
|
||||
},
|
||||
}
|
||||
|
||||
func testCSVScenario(t *testing.T, s formatScenario) {
|
||||
switch s.scenarioType {
|
||||
case "encode-csv":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(',')), s.description)
|
||||
case "encode-tsv":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder('\t')), s.description)
|
||||
case "decode-csv-object":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
|
||||
case "decode-tsv-object":
|
||||
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
|
||||
default:
|
||||
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
|
||||
}
|
||||
}
|
||||
|
||||
func documentCSVDecodeObjectScenario(t *testing.T, w *bufio.Writer, s formatScenario, formatType string) {
|
||||
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||
|
||||
if s.subdescription != "" {
|
||||
writeOrPanic(w, s.subdescription)
|
||||
writeOrPanic(w, "\n\n")
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
|
||||
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
|
||||
|
||||
writeOrPanic(w, "then\n")
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v sample.%v\n```\n", formatType, formatType))
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
separator := ','
|
||||
if formatType == "tsv" {
|
||||
separator = '\t'
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
|
||||
processFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, true, true))),
|
||||
)
|
||||
}
|
||||
|
||||
func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType string) {
|
||||
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
|
||||
|
||||
if s.subdescription != "" {
|
||||
writeOrPanic(w, s.subdescription)
|
||||
writeOrPanic(w, "\n\n")
|
||||
}
|
||||
|
||||
writeOrPanic(w, "Given a sample.yml file of:\n")
|
||||
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
|
||||
|
||||
writeOrPanic(w, "then\n")
|
||||
|
||||
expression := s.expression
|
||||
|
||||
if expression != "" {
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v '%v' sample.yml\n```\n", formatType, expression))
|
||||
} else {
|
||||
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v sample.yml\n```\n", formatType))
|
||||
}
|
||||
writeOrPanic(w, "will output\n")
|
||||
|
||||
separator := ','
|
||||
if formatType == "tsv" {
|
||||
separator = '\t'
|
||||
}
|
||||
|
||||
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
|
||||
processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(separator))),
|
||||
)
|
||||
}
|
||||
|
||||
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
|
||||
s := i.(formatScenario)
|
||||
if s.skipDoc {
|
||||
return
|
||||
}
|
||||
switch s.scenarioType {
|
||||
case "encode-csv":
|
||||
documentCSVEncodeScenario(w, s, "csv")
|
||||
case "encode-tsv":
|
||||
documentCSVEncodeScenario(w, s, "tsv")
|
||||
case "decode-csv-object":
|
||||
documentCSVDecodeObjectScenario(t, w, s, "csv")
|
||||
case "decode-tsv-object":
|
||||
documentCSVDecodeObjectScenario(t, w, s, "tsv")
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCSVScenarios(t *testing.T) {
|
||||
for _, tt := range csvScenarios {
|
||||
testCSVScenario(t, tt)
|
||||
}
|
||||
genericScenarios := make([]interface{}, len(csvScenarios))
|
||||
for i, s := range csvScenarios {
|
||||
genericScenarios[i] = s
|
||||
}
|
||||
documentScenarios(t, "usage", "csv-tsv", genericScenarios, documentCSVScenario)
|
||||
}
|
@ -15,6 +15,8 @@ const (
|
||||
PropertiesInputFormat
|
||||
Base64InputFormat
|
||||
JsonInputFormat
|
||||
CSVObjectInputFormat
|
||||
TSVObjectInputFormat
|
||||
)
|
||||
|
||||
type Decoder interface {
|
||||
@ -32,6 +34,10 @@ func InputFormatFromString(format string) (InputFormat, error) {
|
||||
return PropertiesInputFormat, nil
|
||||
case "json", "ndjson", "j":
|
||||
return JsonInputFormat, nil
|
||||
case "csv":
|
||||
return CSVObjectInputFormat, nil
|
||||
case "tsv":
|
||||
return TSVObjectInputFormat, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format)
|
||||
}
|
||||
|
77
pkg/yqlib/decoder_csv_object.go
Normal file
77
pkg/yqlib/decoder_csv_object.go
Normal file
@ -0,0 +1,77 @@
|
||||
package yqlib
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type csvObjectDecoder struct {
|
||||
separator rune
|
||||
reader csv.Reader
|
||||
finished bool
|
||||
}
|
||||
|
||||
func NewCSVObjectDecoder(separator rune) Decoder {
|
||||
return &csvObjectDecoder{separator: separator}
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) Init(reader io.Reader) {
|
||||
dec.reader = *csv.NewReader(reader)
|
||||
dec.reader.Comma = dec.separator
|
||||
dec.finished = false
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) convertToYamlNode(content string) *yaml.Node {
|
||||
node, err := parseSnippet(content)
|
||||
if err != nil {
|
||||
return createScalarNode(content, content)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) createObject(headerRow []string, contentRow []string) *yaml.Node {
|
||||
objectNode := &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
|
||||
|
||||
for i, header := range headerRow {
|
||||
objectNode.Content = append(
|
||||
objectNode.Content,
|
||||
createScalarNode(header, header),
|
||||
dec.convertToYamlNode(contentRow[i]))
|
||||
}
|
||||
return objectNode
|
||||
}
|
||||
|
||||
func (dec *csvObjectDecoder) Decode(rootYamlNode *yaml.Node) error {
|
||||
if dec.finished {
|
||||
return io.EOF
|
||||
}
|
||||
headerRow, err := dec.reader.Read()
|
||||
log.Debugf(": headerRow%v", headerRow)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootArray := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"}
|
||||
|
||||
contentRow, err := dec.reader.Read()
|
||||
|
||||
for err == nil && len(contentRow) > 0 {
|
||||
log.Debugf("Adding contentRow: %v", contentRow)
|
||||
rootArray.Content = append(rootArray.Content, dec.createObject(headerRow, contentRow))
|
||||
contentRow, err = dec.reader.Read()
|
||||
log.Debugf("Read next contentRow: %v, %v", contentRow, err)
|
||||
}
|
||||
if !errors.Is(err, io.EOF) {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debugf("finished, contentRow%v", contentRow)
|
||||
log.Debugf("err: %v", err)
|
||||
|
||||
rootYamlNode.Kind = yaml.DocumentNode
|
||||
rootYamlNode.Content = []*yaml.Node{rootArray}
|
||||
return nil
|
||||
}
|
143
pkg/yqlib/doc/usage/csv-tsv.md
Normal file
143
pkg/yqlib/doc/usage/csv-tsv.md
Normal file
@ -0,0 +1,143 @@
|
||||
|
||||
{% hint style="warning" %}
|
||||
Note that versions prior to 4.18 require the 'eval/e' command to be specified. 
|
||||
|
||||
`yq e <exp> <file>`
|
||||
{% endhint %}
|
||||
|
||||
## Encode CSV simple
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- [i, like, csv]
|
||||
- [because, excel, is, cool]
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
i,like,csv
|
||||
because,excel,is,cool
|
||||
```
|
||||
|
||||
## Encode TSV simple
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- [i, like, csv]
|
||||
- [because, excel, is, cool]
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=tsv sample.yml
|
||||
```
|
||||
will output
|
||||
```tsv
|
||||
i like csv
|
||||
because excel is cool
|
||||
```
|
||||
|
||||
## Encode array of objects to csv
|
||||
Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Nice thing about this method is you can pick the columns and call the header whatever you like.
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv '[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]' sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
Name,Number of Cats
|
||||
Gary,1
|
||||
Samantha's Rabbit,2
|
||||
```
|
||||
|
||||
## Encode array of objects to csv - generic
|
||||
This is a little trickier than the previous example - we dynamically work out the $header, and use that to automatically create the value arrays.
|
||||
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -o=csv '(.[0] | keys | .[] ) as $header | [[$header]] + [.[] | [ .[$header] ]]' sample.yml
|
||||
```
|
||||
will output
|
||||
```csv
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
```
|
||||
|
||||
## Parse CSV into an array of objects
|
||||
First row is assumed to define the fields
|
||||
|
||||
Given a sample.csv file of:
|
||||
```csv
|
||||
name,numberOfCats,likesApples,height
|
||||
Gary,1,true,168.8
|
||||
Samantha's Rabbit,2,false,-188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -p=csv sample.csv
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
```
|
||||
|
||||
## Parse TSV into an array of objects
|
||||
First row is assumed to define the fields
|
||||
|
||||
Given a sample.tsv file of:
|
||||
```tsv
|
||||
name numberOfCats likesApples height
|
||||
Gary 1 true 168.8
|
||||
Samantha's Rabbit 2 false -188.8
|
||||
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq -p=tsv sample.tsv
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
- name: Gary
|
||||
numberOfCats: 1
|
||||
likesApples: true
|
||||
height: 168.8
|
||||
- name: Samantha's Rabbit
|
||||
numberOfCats: 2
|
||||
likesApples: false
|
||||
height: -188.8
|
||||
```
|
||||
|
@ -31,7 +31,7 @@ type expressionScenario struct {
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
logging.SetLevel(logging.ERROR, "")
|
||||
logging.SetLevel(logging.DEBUG, "")
|
||||
Now = func() time.Time {
|
||||
return time.Date(2021, time.May, 19, 1, 2, 3, 4, time.UTC)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user