Fixed handling of UTF8 encoded CSVs #1373

This commit is contained in:
Mike Farah 2022-10-08 13:12:12 +11:00
parent 1b8d399de4
commit 33ec66cfdd
5 changed files with 24 additions and 1 deletions

View File

@ -63,6 +63,20 @@ EOM
assertEquals "$expected" "$X"
}
testInputCSVUTF8() {
read -r -d '' expected << EOM
- id: 1
first: john
last: smith
- id: 1
first: jane
last: smith
EOM
X=$(./yq -p=csv utf8.csv)
assertEquals "$expected" "$X"
}
testInputTSV() {
cat >test.tsv <<EOL
fruit yumLevel

1
go.mod
View File

@ -4,6 +4,7 @@ require (
github.com/a8m/envsubst v1.3.0
github.com/alecthomas/participle/v2 v2.0.0-beta.5
github.com/alecthomas/repr v0.1.0
github.com/dimchansky/utfbom v1.1.1
github.com/elliotchance/orderedmap v1.5.0
github.com/fatih/color v1.13.0
github.com/goccy/go-json v0.9.11

2
go.sum
View File

@ -8,6 +8,8 @@ github.com/alecthomas/repr v0.1.0/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygv
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U=
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/elliotchance/orderedmap v1.5.0 h1:1IsExUsjv5XNBD3ZdC7jkAAqLWOOKdbPTmkHx63OsBg=
github.com/elliotchance/orderedmap v1.5.0/go.mod h1:wsDwEaX5jEoyhbs7x93zk2H/qv0zwuhg4inXhDkYqys=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=

View File

@ -5,6 +5,7 @@ import (
"errors"
"io"
"github.com/dimchansky/utfbom"
yaml "gopkg.in/yaml.v3"
)
@ -19,7 +20,9 @@ func NewCSVObjectDecoder(separator rune) Decoder {
}
func (dec *csvObjectDecoder) Init(reader io.Reader) {
dec.reader = *csv.NewReader(reader)
cleanReader, enc := utfbom.Skip(reader)
log.Debugf("Detected encoding: %s\n", enc)
dec.reader = *csv.NewReader(cleanReader)
dec.reader.Comma = dec.separator
dec.finished = false
}

3
utf8.csv Normal file
View File

@ -0,0 +1,3 @@
id,first,last
1,john,smith
1,jane,smith
1 id first last
2 1 john smith
3 1 jane smith