Added new CSV option to turn off auto-parsing #1947

This commit is contained in:
Mike Farah 2024-02-15 13:11:53 +11:00
parent e81b600744
commit c32a9ceab8
8 changed files with 188 additions and 28 deletions

View File

@ -63,6 +63,40 @@ EOM
assertEquals "$expected" "$X"
}
testInputCSVNoAuto() {
cat >test.csv <<EOL
thing1
name: cat
EOL
read -r -d '' expected << EOM
- thing1: 'name: cat'
EOM
X=$(./yq --csv-auto-parse=f test.csv -oy)
assertEquals "$expected" "$X"
X=$(./yq ea --csv-auto-parse=f test.csv -oy)
assertEquals "$expected" "$X"
}
testInputTSVNoAuto() {
cat >test.tsv <<EOL
thing1
name: cat
EOL
read -r -d '' expected << EOM
- thing1: 'name: cat'
EOM
X=$(./yq --tsv-auto-parse=f test.tsv -oy)
assertEquals "$expected" "$X"
X=$(./yq ea --tsv-auto-parse=f test.tsv -oy)
assertEquals "$expected" "$X"
}
testInputCSVUTF8() {
read -r -d '' expected << EOM
- id: 1

View File

@ -80,6 +80,9 @@ yq -P -oy sample.json
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredXMLPreferences.SkipProcInst, "xml-skip-proc-inst", yqlib.ConfiguredXMLPreferences.SkipProcInst, "skip over process instructions (e.g. <?xml version=\"1\"?>)")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredXMLPreferences.SkipDirectives, "xml-skip-directives", yqlib.ConfiguredXMLPreferences.SkipDirectives, "skip over directives (e.g. <!DOCTYPE thing cat>)")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredCsvPreferences.AutoParse, "csv-auto-parse", yqlib.ConfiguredCsvPreferences.AutoParse, "parse CSV YAML/JSON values")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredTsvPreferences.AutoParse, "tsv-auto-parse", yqlib.ConfiguredTsvPreferences.AutoParse, "parse TSV YAML/JSON values")
rootCmd.PersistentFlags().StringVar(&yqlib.ConfiguredLuaPreferences.DocPrefix, "lua-prefix", yqlib.ConfiguredLuaPreferences.DocPrefix, "prefix")
rootCmd.PersistentFlags().StringVar(&yqlib.ConfiguredLuaPreferences.DocSuffix, "lua-suffix", yqlib.ConfiguredLuaPreferences.DocSuffix, "suffix")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredLuaPreferences.UnquotedKeys, "lua-unquoted", yqlib.ConfiguredLuaPreferences.UnquotedKeys, "output unquoted string keys (e.g. {foo=\"bar\"})")

View File

@ -140,9 +140,9 @@ func createDecoder(format yqlib.InputFormat, evaluateTogether bool) (yqlib.Decod
case yqlib.JsonInputFormat:
return yqlib.NewJSONDecoder(), nil
case yqlib.CSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder(','), nil
return yqlib.NewCSVObjectDecoder(yqlib.ConfiguredCsvPreferences), nil
case yqlib.TSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder('\t'), nil
return yqlib.NewCSVObjectDecoder(yqlib.ConfiguredTsvPreferences), nil
case yqlib.TomlInputFormat:
return yqlib.NewTomlDecoder(), nil
case yqlib.YamlInputFormat:

23
pkg/yqlib/csv.go Normal file
View File

@ -0,0 +1,23 @@
package yqlib
type CsvPreferences struct {
Separator rune
AutoParse bool
}
func NewDefaultCsvPreferences() CsvPreferences {
return CsvPreferences{
Separator: ',',
AutoParse: true,
}
}
func NewDefaultTsvPreferences() CsvPreferences {
return CsvPreferences{
Separator: '\t',
AutoParse: true,
}
}
var ConfiguredCsvPreferences = NewDefaultCsvPreferences()
var ConfiguredTsvPreferences = NewDefaultTsvPreferences()

View File

@ -12,6 +12,11 @@ const csvSimple = `name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
`
const csvSimpleWithObject = `name,numberOfCats,likesApples,height,facts
Gary,1,true,168.8,cool: true
Samantha's Rabbit,2,false,-188.8,tall: indeed
`
const csvMissing = `name,numberOfCats,likesApples,height
,null,,168.8
`
@ -39,6 +44,31 @@ const expectedYamlFromCSV = `- name: Gary
likesApples: false
height: -188.8
`
const expectedYamlFromCSVWithObject = `- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
facts:
cool: true
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts:
tall: indeed
`
const expectedYamlFromCSVNoParsing = `- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
facts: 'cool: true'
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts: 'tall: indeed'
`
const expectedYamlFromCSVMissingData = `- name: Gary
numberOfCats: 1
@ -125,7 +155,7 @@ var csvScenarios = []formatScenario{
input: csvSimple,
expression: ".[0].name | key",
expected: "name\n",
scenarioType: "decode-csv-object",
scenarioType: "decode-csv",
},
{
description: "decode csv parent",
@ -133,14 +163,21 @@ var csvScenarios = []formatScenario{
input: csvSimple,
expression: ".[0].name | parent | .height",
expected: "168.8\n",
scenarioType: "decode-csv-object",
scenarioType: "decode-csv",
},
{
description: "Parse CSV into an array of objects",
subdescription: "First row is assumed to be the header row.",
input: csvSimple,
expected: expectedYamlFromCSV,
scenarioType: "decode-csv-object",
subdescription: "First row is assumed to be the header row. By default, entries with YAML/JSON formatting will be parsed!",
input: csvSimpleWithObject,
expected: expectedYamlFromCSVWithObject,
scenarioType: "decode-csv",
},
{
description: "Parse CSV into an array of objects, no auto-parsing",
subdescription: "First row is assumed to be the header row. Entries with YAML/JSON will be left as strings.",
input: csvSimpleWithObject,
expected: expectedYamlFromCSVNoParsing,
scenarioType: "decode-csv-no-auto",
},
{
description: "Scalar roundtrip",
@ -172,12 +209,14 @@ func testCSVScenario(t *testing.T, s formatScenario) {
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewCsvEncoder(',')), s.description)
case "encode-tsv":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewCsvEncoder('\t')), s.description)
case "decode-csv-object":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-csv":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredCsvPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-csv-no-auto":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: ',', AutoParse: false}), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-tsv-object":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredTsvPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "roundtrip-csv":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredCsvPreferences), NewCsvEncoder(',')), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
@ -204,7 +243,32 @@ func documentCSVDecodeObjectScenario(w *bufio.Writer, s formatScenario, formatTy
}
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
mustProcessFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))),
mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: true}), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))),
)
}
func documentCSVDecodeObjectNoAutoScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
writeOrPanic(w, "then\n")
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v --csv-auto-parse=f sample.%v\n```\n", formatType, formatType))
writeOrPanic(w, "will output\n")
separator := ','
if formatType == "tsv" {
separator = '\t'
}
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: false}), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))),
)
}
@ -268,7 +332,7 @@ func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType
}
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
mustProcessFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: true}), NewCsvEncoder(separator))),
)
}
@ -282,8 +346,10 @@ func documentCSVScenario(_ *testing.T, w *bufio.Writer, i interface{}) {
documentCSVEncodeScenario(w, s, "csv")
case "encode-tsv":
documentCSVEncodeScenario(w, s, "tsv")
case "decode-csv-object":
case "decode-csv":
documentCSVDecodeObjectScenario(w, s, "csv")
case "decode-csv-no-auto":
documentCSVDecodeObjectNoAutoScenario(w, s, "csv")
case "decode-tsv-object":
documentCSVDecodeObjectScenario(w, s, "tsv")
case "roundtrip-csv":

View File

@ -9,27 +9,29 @@ import (
)
type csvObjectDecoder struct {
separator rune
reader csv.Reader
finished bool
prefs CsvPreferences
reader csv.Reader
finished bool
}
func NewCSVObjectDecoder(separator rune) Decoder {
return &csvObjectDecoder{separator: separator}
func NewCSVObjectDecoder(prefs CsvPreferences) Decoder {
return &csvObjectDecoder{prefs: prefs}
}
func (dec *csvObjectDecoder) Init(reader io.Reader) error {
cleanReader, enc := utfbom.Skip(reader)
log.Debugf("Detected encoding: %s\n", enc)
dec.reader = *csv.NewReader(cleanReader)
dec.reader.Comma = dec.separator
dec.reader.Comma = dec.prefs.Separator
dec.finished = false
return nil
}
func (dec *csvObjectDecoder) convertToNode(content string) *CandidateNode {
node, err := parseSnippet(content)
if err != nil {
// if we're not auto-parsing, then we wont put in parsed objects or arrays
// but we still parse scalars
if err != nil || (!dec.prefs.AutoParse && node.Kind != ScalarNode) {
return createScalarNode(content, content)
}
return node

View File

@ -136,13 +136,13 @@ Samantha's Rabbit,,-188.8
```
## Parse CSV into an array of objects
First row is assumed to be the header row.
First row is assumed to be the header row. By default, entries with YAML/JSON formatting will be parsed!
Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
name,numberOfCats,likesApples,height,facts
Gary,1,true,168.8,cool: true
Samantha's Rabbit,2,false,-188.8,tall: indeed
```
then
@ -155,10 +155,42 @@ will output
numberOfCats: 1
likesApples: true
height: 168.8
facts:
cool: true
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts:
tall: indeed
```
## Parse CSV into an array of objects, no auto-parsing
First row is assumed to be the header row. Entries with YAML/JSON will be left as strings.
Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height,facts
Gary,1,true,168.8,cool: true
Samantha's Rabbit,2,false,-188.8,tall: indeed
```
then
```bash
yq -p=csv --csv-auto-parse=f sample.csv
```
will output
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
facts: 'cool: true'
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts: 'tall: indeed'
```
## Parse TSV into an array of objects

View File

@ -112,9 +112,9 @@ func createDecoder(format InputFormat) Decoder {
case PropertiesInputFormat:
decoder = NewPropertiesDecoder()
case CSVObjectInputFormat:
decoder = NewCSVObjectDecoder(',')
decoder = NewCSVObjectDecoder(ConfiguredCsvPreferences)
case TSVObjectInputFormat:
decoder = NewCSVObjectDecoder('\t')
decoder = NewCSVObjectDecoder(ConfiguredTsvPreferences)
case UriInputFormat:
decoder = NewUriDecoder()
}