From 9628aaf8c533f2128f9e5209a014c299f1384372 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Mon, 1 Aug 2022 10:05:26 +1000 Subject: [PATCH] Added CSV roundtrip --- acceptance_tests/inputs-format.sh | 47 +++++++++ acceptance_tests/output-format.sh | 42 ++++++++ pkg/yqlib/csv_test.go | 98 +++++++++++++++++-- pkg/yqlib/doc/operators/encode-decode.md | 46 ++++++++- .../doc/operators/headers/encode-decode.md | 8 +- pkg/yqlib/doc/usage/csv-tsv.md | 90 ++++++++++++++--- pkg/yqlib/doc/usage/headers/csv-tsv.md | 29 +++++- pkg/yqlib/encoder_csv.go | 78 ++++++++++++--- pkg/yqlib/encoder_csv_test.go | 60 ------------ pkg/yqlib/lexer_participle.go | 3 + pkg/yqlib/lib.go | 6 +- pkg/yqlib/operator_encoder_decoder.go | 4 + pkg/yqlib/operator_encoder_decoder_test.go | 18 +++- 13 files changed, 424 insertions(+), 105 deletions(-) delete mode 100644 pkg/yqlib/encoder_csv_test.go diff --git a/acceptance_tests/inputs-format.sh b/acceptance_tests/inputs-format.sh index acb1c3d3..31598f9f 100755 --- a/acceptance_tests/inputs-format.sh +++ b/acceptance_tests/inputs-format.sh @@ -3,6 +3,8 @@ setUp() { rm test*.yml 2>/dev/null || true rm test*.properties 2>/dev/null || true + rm test*.csv 2>/dev/null || true + rm test*.tsv 2>/dev/null || true rm test*.xml 2>/dev/null || true } @@ -40,6 +42,51 @@ EOM assertEquals "$expected" "$X" } +testInputCSV() { + cat >test.csv <test.tsv <test.yml <BiBi diff --git a/acceptance_tests/output-format.sh b/acceptance_tests/output-format.sh index 995f0fcf..a238dc0b 100755 --- a/acceptance_tests/output-format.sh +++ b/acceptance_tests/output-format.sh @@ -102,6 +102,48 @@ EOM assertEquals "$expected" "$X" } +testOutputCSV() { + cat >test.yml <test.yml <test.yml < string/numbers/boolean value), child[0] is a %v", child.Tag) + } + mapKeys := getMapKeys(child) + return mapKeys.Content, nil +} + +func (e *csvEncoder) createChildRow(child *yaml.Node, headers []*yaml.Node) []*yaml.Node { + childRow := make([]*yaml.Node, 0) + for _, header := range headers { + keyIndex := findKeyInMap(child, header) + value := createScalarNode(nil, "") + if keyIndex != -1 { + value = child.Content[keyIndex+1] + } + childRow = append(childRow, value) + } + return childRow + +} + +func (e *csvEncoder) encodeObjects(csvWriter *csv.Writer, content []*yaml.Node) error { + headers, err := e.extractHeader(content[0]) + if err != nil { + return nil + } + + err = e.encodeRow(csvWriter, headers) + if err != nil { + return nil + } + + for i, child := range content { + if child.Kind != yaml.MappingNode { + return fmt.Errorf("csv object encoding only works for arrays of flat objects (string key => string/numbers/boolean value), child[%v] is a %v", i, child.Tag) + } + row := e.createChildRow(child, headers) + err = e.encodeRow(csvWriter, row) + if err != nil { + return err + } + + } + return nil +} + func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error { csvWriter := csv.NewWriter(writer) csvWriter.Comma = e.separator @@ -56,15 +117,10 @@ func (e *csvEncoder) Encode(writer io.Writer, originalNode *yaml.Node) error { return e.encodeRow(csvWriter, node.Content) } - for i, child := range node.Content { - - if child.Kind != yaml.SequenceNode { - return fmt.Errorf("csv encoding only works for arrays of scalars (string/numbers/booleans), child[%v] is a %v", i, child.Tag) - } - err := e.encodeRow(csvWriter, child.Content) - if err != nil { - return err - } + if node.Content[0].Kind == yaml.MappingNode { + return e.encodeObjects(csvWriter, node.Content) } - return nil + + return e.encodeArrays(csvWriter, node.Content) + } diff --git a/pkg/yqlib/encoder_csv_test.go b/pkg/yqlib/encoder_csv_test.go deleted file mode 100644 index 9e636c98..00000000 --- a/pkg/yqlib/encoder_csv_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package yqlib - -import ( - "bufio" - "bytes" - "strings" - "testing" - - "github.com/mikefarah/yq/v4/test" -) - -func yamlToCsv(sampleYaml string, separator rune) string { - var output bytes.Buffer - writer := bufio.NewWriter(&output) - - var jsonEncoder = NewCsvEncoder(separator) - inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder()) - if err != nil { - panic(err) - } - node := inputs.Front().Value.(*CandidateNode).Node - err = jsonEncoder.Encode(writer, node) - if err != nil { - panic(err) - } - writer.Flush() - - return strings.TrimSuffix(output.String(), "\n") -} - -var sampleYaml = `["apple", apple2, "comma, in, value", "new -line", 3, 3.40, true, "tab here"]` - -var sampleYamlArray = "[" + sampleYaml + ", [bob, cat, meow, puss]]" - -func TestCsvEncoderEmptyArray(t *testing.T) { - var actualCsv = yamlToCsv(`[]`, ',') - test.AssertResult(t, "", actualCsv) -} - -func TestCsvEncoder(t *testing.T) { - var expectedCsv = `apple,apple2,"comma, in, value",new line,3,3.40,true,tab here` - - var actualCsv = yamlToCsv(sampleYaml, ',') - test.AssertResult(t, expectedCsv, actualCsv) -} - -func TestCsvEncoderArrayOfArrays(t *testing.T) { - var actualCsv = yamlToCsv(sampleYamlArray, ',') - var expectedCsv = "apple,apple2,\"comma, in, value\",new line,3,3.40,true,tab here\nbob,cat,meow,puss" - test.AssertResult(t, expectedCsv, actualCsv) -} - -func TestTsvEncoder(t *testing.T) { - - var expectedCsv = `apple apple2 comma, in, value new line 3 3.40 true "tab here"` - - var actualCsv = yamlToCsv(sampleYaml, '\t') - test.AssertResult(t, expectedCsv, actualCsv) -} diff --git a/pkg/yqlib/lexer_participle.go b/pkg/yqlib/lexer_participle.go index 09e13844..7a5ba138 100644 --- a/pkg/yqlib/lexer_participle.go +++ b/pkg/yqlib/lexer_participle.go @@ -67,7 +67,10 @@ var participleYqRules = []*participleYqRule{ {"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0}, {"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0}, + {"CSVDecode", `from_?csv|@csvd`, decodeOp(CSVObjectInputFormat), 0}, {"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0}, + + {"TSVDecode", `from_?tsv|@tsvd`, decodeOp(TSVObjectInputFormat), 0}, {"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0}, {"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0}, diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 5ec53a76..bd586ac2 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -205,10 +205,10 @@ func findInArray(array *yaml.Node, item *yaml.Node) int { return -1 } -func findKeyInMap(array *yaml.Node, item *yaml.Node) int { +func findKeyInMap(dataMap *yaml.Node, item *yaml.Node) int { - for index := 0; index < len(array.Content); index = index + 2 { - if recursiveNodeEqual(array.Content[index], item) { + for index := 0; index < len(dataMap.Content); index = index + 2 { + if recursiveNodeEqual(dataMap.Content[index], item) { return index } } diff --git a/pkg/yqlib/operator_encoder_decoder.go b/pkg/yqlib/operator_encoder_decoder.go index 4751a27c..afb321ef 100644 --- a/pkg/yqlib/operator_encoder_decoder.go +++ b/pkg/yqlib/operator_encoder_decoder.go @@ -114,6 +114,10 @@ func decodeOperator(d *dataTreeNavigator, context Context, expressionNode *Expre decoder = NewBase64Decoder() case PropertiesInputFormat: decoder = NewPropertiesDecoder() + case CSVObjectInputFormat: + decoder = NewCSVObjectDecoder(',') + case TSVObjectInputFormat: + decoder = NewCSVObjectDecoder('\t') } var results = list.New() diff --git a/pkg/yqlib/operator_encoder_decoder_test.go b/pkg/yqlib/operator_encoder_decoder_test.go index 2c131f0e..5b5726f0 100644 --- a/pkg/yqlib/operator_encoder_decoder_test.go +++ b/pkg/yqlib/operator_encoder_decoder_test.go @@ -66,11 +66,27 @@ var encoderDecoderOperatorScenarios = []expressionScenario{ { description: "Decode props encoded string", document: `a: "cats=great\ndogs=cool as well"`, - expression: `.a |= from_props`, + expression: `.a |= @propsd`, expected: []string{ "D0, P[], (doc)::a:\n cats: great\n dogs: cool as well\n", }, }, + { + description: "Decode csv encoded string", + document: `a: "cats,dogs\ngreat,cool as well"`, + expression: `.a |= @csvd`, + expected: []string{ + "D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n", + }, + }, + { + description: "Decode tsv encoded string", + document: `a: "cats dogs\ngreat cool as well"`, + expression: `.a |= @tsvd`, + expected: []string{ + "D0, P[], (doc)::a:\n - cats: great\n dogs: cool as well\n", + }, + }, { skipDoc: true, document: "a:\n cool:\n bob: dylan",