diff --git a/acceptance_tests/inputs-format.sh b/acceptance_tests/inputs-format.sh index cd48cb4a..e7a7563e 100755 --- a/acceptance_tests/inputs-format.sh +++ b/acceptance_tests/inputs-format.sh @@ -63,6 +63,40 @@ EOM assertEquals "$expected" "$X" } +testInputCSVNoAuto() { + cat >test.csv <test.tsv <)") rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredXMLPreferences.SkipDirectives, "xml-skip-directives", yqlib.ConfiguredXMLPreferences.SkipDirectives, "skip over directives (e.g. )") + rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredCsvPreferences.AutoParse, "csv-auto-parse", yqlib.ConfiguredCsvPreferences.AutoParse, "parse CSV YAML/JSON values") + rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredTsvPreferences.AutoParse, "tsv-auto-parse", yqlib.ConfiguredTsvPreferences.AutoParse, "parse TSV YAML/JSON values") + rootCmd.PersistentFlags().StringVar(&yqlib.ConfiguredLuaPreferences.DocPrefix, "lua-prefix", yqlib.ConfiguredLuaPreferences.DocPrefix, "prefix") rootCmd.PersistentFlags().StringVar(&yqlib.ConfiguredLuaPreferences.DocSuffix, "lua-suffix", yqlib.ConfiguredLuaPreferences.DocSuffix, "suffix") rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredLuaPreferences.UnquotedKeys, "lua-unquoted", yqlib.ConfiguredLuaPreferences.UnquotedKeys, "output unquoted string keys (e.g. {foo=\"bar\"})") diff --git a/cmd/utils.go b/cmd/utils.go index 363fb25d..16ddcf9e 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -140,9 +140,9 @@ func createDecoder(format yqlib.InputFormat, evaluateTogether bool) (yqlib.Decod case yqlib.JsonInputFormat: return yqlib.NewJSONDecoder(), nil case yqlib.CSVObjectInputFormat: - return yqlib.NewCSVObjectDecoder(','), nil + return yqlib.NewCSVObjectDecoder(yqlib.ConfiguredCsvPreferences), nil case yqlib.TSVObjectInputFormat: - return yqlib.NewCSVObjectDecoder('\t'), nil + return yqlib.NewCSVObjectDecoder(yqlib.ConfiguredTsvPreferences), nil case yqlib.TomlInputFormat: return yqlib.NewTomlDecoder(), nil case yqlib.YamlInputFormat: diff --git a/pkg/yqlib/csv.go b/pkg/yqlib/csv.go new file mode 100644 index 00000000..e43b620d --- /dev/null +++ b/pkg/yqlib/csv.go @@ -0,0 +1,23 @@ +package yqlib + +type CsvPreferences struct { + Separator rune + AutoParse bool +} + +func NewDefaultCsvPreferences() CsvPreferences { + return CsvPreferences{ + Separator: ',', + AutoParse: true, + } +} + +func NewDefaultTsvPreferences() CsvPreferences { + return CsvPreferences{ + Separator: '\t', + AutoParse: true, + } +} + +var ConfiguredCsvPreferences = NewDefaultCsvPreferences() +var ConfiguredTsvPreferences = NewDefaultTsvPreferences() diff --git a/pkg/yqlib/csv_test.go b/pkg/yqlib/csv_test.go index c8e7e16d..b5872763 100644 --- a/pkg/yqlib/csv_test.go +++ b/pkg/yqlib/csv_test.go @@ -12,6 +12,11 @@ const csvSimple = `name,numberOfCats,likesApples,height Gary,1,true,168.8 Samantha's Rabbit,2,false,-188.8 ` + +const csvSimpleWithObject = `name,numberOfCats,likesApples,height,facts +Gary,1,true,168.8,cool: true +Samantha's Rabbit,2,false,-188.8,tall: indeed +` const csvMissing = `name,numberOfCats,likesApples,height ,null,,168.8 ` @@ -39,6 +44,31 @@ const expectedYamlFromCSV = `- name: Gary likesApples: false height: -188.8 ` +const expectedYamlFromCSVWithObject = `- name: Gary + numberOfCats: 1 + likesApples: true + height: 168.8 + facts: + cool: true +- name: Samantha's Rabbit + numberOfCats: 2 + likesApples: false + height: -188.8 + facts: + tall: indeed +` + +const expectedYamlFromCSVNoParsing = `- name: Gary + numberOfCats: 1 + likesApples: true + height: 168.8 + facts: 'cool: true' +- name: Samantha's Rabbit + numberOfCats: 2 + likesApples: false + height: -188.8 + facts: 'tall: indeed' +` const expectedYamlFromCSVMissingData = `- name: Gary numberOfCats: 1 @@ -125,7 +155,7 @@ var csvScenarios = []formatScenario{ input: csvSimple, expression: ".[0].name | key", expected: "name\n", - scenarioType: "decode-csv-object", + scenarioType: "decode-csv", }, { description: "decode csv parent", @@ -133,14 +163,21 @@ var csvScenarios = []formatScenario{ input: csvSimple, expression: ".[0].name | parent | .height", expected: "168.8\n", - scenarioType: "decode-csv-object", + scenarioType: "decode-csv", }, { description: "Parse CSV into an array of objects", - subdescription: "First row is assumed to be the header row.", - input: csvSimple, - expected: expectedYamlFromCSV, - scenarioType: "decode-csv-object", + subdescription: "First row is assumed to be the header row. By default, entries with YAML/JSON formatting will be parsed!", + input: csvSimpleWithObject, + expected: expectedYamlFromCSVWithObject, + scenarioType: "decode-csv", + }, + { + description: "Parse CSV into an array of objects, no auto-parsing", + subdescription: "First row is assumed to be the header row. Entries with YAML/JSON will be left as strings.", + input: csvSimpleWithObject, + expected: expectedYamlFromCSVNoParsing, + scenarioType: "decode-csv-no-auto", }, { description: "Scalar roundtrip", @@ -172,12 +209,14 @@ func testCSVScenario(t *testing.T, s formatScenario) { test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewCsvEncoder(',')), s.description) case "encode-tsv": test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewCsvEncoder('\t')), s.description) - case "decode-csv-object": - test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description) + case "decode-csv": + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredCsvPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description) + case "decode-csv-no-auto": + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: ',', AutoParse: false}), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description) case "decode-tsv-object": - test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description) + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredTsvPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description) case "roundtrip-csv": - test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description) + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredCsvPreferences), NewCsvEncoder(',')), s.description) default: panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType)) } @@ -204,7 +243,32 @@ func documentCSVDecodeObjectScenario(w *bufio.Writer, s formatScenario, formatTy } writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", - mustProcessFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))), + mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: true}), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))), + ) +} + +func documentCSVDecodeObjectNoAutoScenario(w *bufio.Writer, s formatScenario, formatType string) { + writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) + + if s.subdescription != "" { + writeOrPanic(w, s.subdescription) + writeOrPanic(w, "\n\n") + } + + writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType)) + writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input)) + + writeOrPanic(w, "then\n") + writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v --csv-auto-parse=f sample.%v\n```\n", formatType, formatType)) + writeOrPanic(w, "will output\n") + + separator := ',' + if formatType == "tsv" { + separator = '\t' + } + + writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", + mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: false}), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))), ) } @@ -268,7 +332,7 @@ func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType } writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType, - mustProcessFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))), + mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: true}), NewCsvEncoder(separator))), ) } @@ -282,8 +346,10 @@ func documentCSVScenario(_ *testing.T, w *bufio.Writer, i interface{}) { documentCSVEncodeScenario(w, s, "csv") case "encode-tsv": documentCSVEncodeScenario(w, s, "tsv") - case "decode-csv-object": + case "decode-csv": documentCSVDecodeObjectScenario(w, s, "csv") + case "decode-csv-no-auto": + documentCSVDecodeObjectNoAutoScenario(w, s, "csv") case "decode-tsv-object": documentCSVDecodeObjectScenario(w, s, "tsv") case "roundtrip-csv": diff --git a/pkg/yqlib/decoder_csv_object.go b/pkg/yqlib/decoder_csv_object.go index 644bbfa1..6273fcbb 100644 --- a/pkg/yqlib/decoder_csv_object.go +++ b/pkg/yqlib/decoder_csv_object.go @@ -9,27 +9,29 @@ import ( ) type csvObjectDecoder struct { - separator rune - reader csv.Reader - finished bool + prefs CsvPreferences + reader csv.Reader + finished bool } -func NewCSVObjectDecoder(separator rune) Decoder { - return &csvObjectDecoder{separator: separator} +func NewCSVObjectDecoder(prefs CsvPreferences) Decoder { + return &csvObjectDecoder{prefs: prefs} } func (dec *csvObjectDecoder) Init(reader io.Reader) error { cleanReader, enc := utfbom.Skip(reader) log.Debugf("Detected encoding: %s\n", enc) dec.reader = *csv.NewReader(cleanReader) - dec.reader.Comma = dec.separator + dec.reader.Comma = dec.prefs.Separator dec.finished = false return nil } func (dec *csvObjectDecoder) convertToNode(content string) *CandidateNode { node, err := parseSnippet(content) - if err != nil { + // if we're not auto-parsing, then we wont put in parsed objects or arrays + // but we still parse scalars + if err != nil || (!dec.prefs.AutoParse && node.Kind != ScalarNode) { return createScalarNode(content, content) } return node diff --git a/pkg/yqlib/doc/usage/csv-tsv.md b/pkg/yqlib/doc/usage/csv-tsv.md index 70331a2b..e38ce347 100644 --- a/pkg/yqlib/doc/usage/csv-tsv.md +++ b/pkg/yqlib/doc/usage/csv-tsv.md @@ -136,13 +136,13 @@ Samantha's Rabbit,,-188.8 ``` ## Parse CSV into an array of objects -First row is assumed to be the header row. +First row is assumed to be the header row. By default, entries with YAML/JSON formatting will be parsed! Given a sample.csv file of: ```csv -name,numberOfCats,likesApples,height -Gary,1,true,168.8 -Samantha's Rabbit,2,false,-188.8 +name,numberOfCats,likesApples,height,facts +Gary,1,true,168.8,cool: true +Samantha's Rabbit,2,false,-188.8,tall: indeed ``` then @@ -155,10 +155,42 @@ will output numberOfCats: 1 likesApples: true height: 168.8 + facts: + cool: true - name: Samantha's Rabbit numberOfCats: 2 likesApples: false height: -188.8 + facts: + tall: indeed +``` + +## Parse CSV into an array of objects, no auto-parsing +First row is assumed to be the header row. Entries with YAML/JSON will be left as strings. + +Given a sample.csv file of: +```csv +name,numberOfCats,likesApples,height,facts +Gary,1,true,168.8,cool: true +Samantha's Rabbit,2,false,-188.8,tall: indeed + +``` +then +```bash +yq -p=csv --csv-auto-parse=f sample.csv +``` +will output +```yaml +- name: Gary + numberOfCats: 1 + likesApples: true + height: 168.8 + facts: 'cool: true' +- name: Samantha's Rabbit + numberOfCats: 2 + likesApples: false + height: -188.8 + facts: 'tall: indeed' ``` ## Parse TSV into an array of objects diff --git a/pkg/yqlib/operator_encoder_decoder.go b/pkg/yqlib/operator_encoder_decoder.go index e6a4eb84..84037146 100644 --- a/pkg/yqlib/operator_encoder_decoder.go +++ b/pkg/yqlib/operator_encoder_decoder.go @@ -112,9 +112,9 @@ func createDecoder(format InputFormat) Decoder { case PropertiesInputFormat: decoder = NewPropertiesDecoder() case CSVObjectInputFormat: - decoder = NewCSVObjectDecoder(',') + decoder = NewCSVObjectDecoder(ConfiguredCsvPreferences) case TSVObjectInputFormat: - decoder = NewCSVObjectDecoder('\t') + decoder = NewCSVObjectDecoder(ConfiguredTsvPreferences) case UriInputFormat: decoder = NewUriDecoder() }