Added NDJSON decoder (#1281)

This commit is contained in:
Mike Farah 2022-07-27 12:26:22 +10:00 committed by GitHub
parent c2c48bbe1d
commit b9a1ef89fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 425 additions and 15 deletions

View File

@ -66,6 +66,8 @@ func configureDecoder() (yqlib.Decoder, error) {
return yqlib.NewXMLDecoder(xmlAttributePrefix, xmlContentName, xmlStrictMode, xmlKeepNamespace, xmlUseRawToken), nil
case yqlib.PropertiesInputFormat:
return yqlib.NewPropertiesDecoder(), nil
case yqlib.JsonInputFormat:
return yqlib.NewJSONDecoder(), nil
}
return yqlib.NewYamlDecoder(), nil
@ -91,7 +93,7 @@ func configurePrinterWriter(format yqlib.PrinterOutputFormat, out io.Writer) (yq
func configureEncoder(format yqlib.PrinterOutputFormat) yqlib.Encoder {
switch format {
case yqlib.JSONOutputFormat:
return yqlib.NewJONEncoder(indent, colorsEnabled)
return yqlib.NewJSONEncoder(indent, colorsEnabled)
case yqlib.PropsOutputFormat:
return yqlib.NewPropertiesEncoder(unwrapScalar)
case yqlib.CSVOutputFormat:

1
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/alecthomas/repr v0.1.0
github.com/elliotchance/orderedmap v1.4.0
github.com/fatih/color v1.13.0
github.com/goccy/go-json v0.9.10
github.com/goccy/go-yaml v1.9.5
github.com/jinzhu/copier v0.3.5
github.com/magiconair/properties v1.8.6

2
go.sum
View File

@ -17,6 +17,8 @@ github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvSc
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/goccy/go-json v0.9.10 h1:hCeNmprSNLB8B8vQKWl6DpuH0t60oEs+TAk9a7CScKc=
github.com/goccy/go-json v0.9.10/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/goccy/go-yaml v1.9.5 h1:Eh/+3uk9kLxG4koCX6lRMAPS1OaMSAi+FJcya0INdB0=
github.com/goccy/go-yaml v1.9.5/go.mod h1:U/jl18uSupI5rdI2jmuCswEA2htH9eXfferR3KfscvA=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=

View File

@ -14,6 +14,7 @@ const (
XMLInputFormat
PropertiesInputFormat
Base64InputFormat
JsonInputFormat
)
type Decoder interface {
@ -29,6 +30,8 @@ func InputFormatFromString(format string) (InputFormat, error) {
return XMLInputFormat, nil
case "props", "p":
return PropertiesInputFormat, nil
case "json", "ndjson", "j":
return JsonInputFormat, nil
default:
return 0, fmt.Errorf("unknown format '%v' please use [yaml|xml|props]", format)
}

85
pkg/yqlib/decoder_json.go Normal file
View File

@ -0,0 +1,85 @@
package yqlib
import (
"fmt"
"io"
"github.com/goccy/go-json"
yaml "gopkg.in/yaml.v3"
)
type jsonDecoder struct {
decoder json.Decoder
}
func NewJSONDecoder() Decoder {
return &jsonDecoder{}
}
func (dec *jsonDecoder) Init(reader io.Reader) {
dec.decoder = *json.NewDecoder(reader)
}
func (dec *jsonDecoder) Decode(rootYamlNode *yaml.Node) error {
var dataBucket interface{}
log.Debug("going to decode")
err := dec.decoder.Decode(&dataBucket)
if err != nil {
return err
}
node, err := dec.convertToYamlNode(dataBucket)
if err != nil {
return err
}
rootYamlNode.Kind = yaml.DocumentNode
rootYamlNode.Content = []*yaml.Node{node}
return nil
}
func (dec *jsonDecoder) convertToYamlNode(data interface{}) (*yaml.Node, error) {
switch data := data.(type) {
case nil:
return createScalarNode(nil, "null"), nil
case float64, float32:
// json decoder returns ints as float.
return parseSnippet(fmt.Sprintf("%v", data))
case int, int64, int32, string, bool:
return createScalarNode(data, fmt.Sprintf("%v", data)), nil
case map[string]interface{}:
return dec.parseMap(data)
case []interface{}:
return dec.parseArray(data)
default:
return nil, fmt.Errorf("unrecognised type :(")
}
}
func (dec *jsonDecoder) parseMap(dataMap map[string]interface{}) (*yaml.Node, error) {
var yamlMap = &yaml.Node{Kind: yaml.MappingNode}
for key, value := range dataMap {
yamlValue, err := dec.convertToYamlNode(value)
if err != nil {
return nil, err
}
yamlMap.Content = append(yamlMap.Content, createScalarNode(key, fmt.Sprintf("%v", key)), yamlValue)
}
return yamlMap, nil
}
func (dec *jsonDecoder) parseArray(dataArray []interface{}) (*yaml.Node, error) {
var yamlMap = &yaml.Node{Kind: yaml.SequenceNode}
for _, value := range dataArray {
yamlValue, err := dec.convertToYamlNode(value)
if err != nil {
return nil, err
}
yamlMap.Content = append(yamlMap.Content, yamlValue)
}
return yamlMap, nil
}

View File

@ -1,6 +1,8 @@
# JSON
Encode and decode to and from JSON. Note that YAML is a _superset_ of JSON - so `yq` can read any json file without doing anything special.
Encode and decode to and from JSON. Note that, unless you have multiple JSON documents in a single file, YAML is a _superset_ of JSON - so `yq` can read any json file without doing anything special.
If you do have mulitple JSON documents in a single file (e.g. NDJSON) then you will need to explicity use the json parser `-p=json`.
This means you don't need to 'convert' a JSON file to YAML - however if you want idiomatic YAML styling, then you can use the `-P/--prettyPrint` flag, see examples below.
@ -130,3 +132,129 @@ will output
{"whatever":"cat"}
```
## Roundtrip NDJSON
Unfortunately the json encoder strips leading spaces of values.
Given a sample.json file of:
```json
{"this": "is a multidoc json file"}
{"each": ["line is a valid json document"]}
{"a number": 4}
```
then
```bash
yq -p=json -o=json -I=0 sample.json
```
will output
```yaml
{"this":"is a multidoc json file"}
{"each":["line is a valid json document"]}
{"a number":4}
```
## Roundtrip multi-document JSON
The NDJSON parser can also handle multiple multi-line json documents in a single file!
Given a sample.json file of:
```json
{
"this": "is a multidoc json file"
}
{
"it": [
"has",
"consecutive",
"json documents"
]
}
{
"a number": 4
}
```
then
```bash
yq -p=json -o=json -I=2 sample.json
```
will output
```yaml
{
"this": "is a multidoc json file"
}
{
"it": [
"has",
"consecutive",
"json documents"
]
}
{
"a number": 4
}
```
## Update a specific document in a multi-document json
Documents are indexed by the `documentIndex` or `di` operator.
Given a sample.json file of:
```json
{"this": "is a multidoc json file"}
{"each": ["line is a valid json document"]}
{"a number": 4}
```
then
```bash
yq -p=json -o=json -I=0 '(select(di == 1) | .each ) += "cool"' sample.json
```
will output
```yaml
{"this":"is a multidoc json file"}
{"each":["line is a valid json document","cool"]}
{"a number":4}
```
## Find and update a specific document in a multi-document json
Use expressions as you normally would.
Given a sample.json file of:
```json
{"this": "is a multidoc json file"}
{"each": ["line is a valid json document"]}
{"a number": 4}
```
then
```bash
yq -p=json -o=json -I=0 '(select(has("each")) | .each ) += "cool"' sample.json
```
will output
```yaml
{"this":"is a multidoc json file"}
{"each":["line is a valid json document","cool"]}
{"a number":4}
```
## Decode NDJSON
Given a sample.json file of:
```json
{"this": "is a multidoc json file"}
{"each": ["line is a valid json document"]}
{"a number": 4}
```
then
```bash
yq -p=json sample.json
```
will output
```yaml
this: is a multidoc json file
---
each:
- line is a valid json document
---
a number: 4
```

View File

@ -1,5 +1,7 @@
# JSON
Encode and decode to and from JSON. Note that YAML is a _superset_ of JSON - so `yq` can read any json file without doing anything special.
Encode and decode to and from JSON. Note that, unless you have multiple JSON documents in a single file, YAML is a _superset_ of JSON - so `yq` can read any json file without doing anything special.
If you do have mulitple JSON documents in a single file (e.g. NDJSON) then you will need to explicity use the json parser `-p=json`.
This means you don't need to 'convert' a JSON file to YAML - however if you want idiomatic YAML styling, then you can use the `-P/--prettyPrint` flag, see examples below.

View File

@ -2,9 +2,9 @@ package yqlib
import (
"bytes"
"encoding/json"
"io"
"github.com/goccy/go-json"
yaml "gopkg.in/yaml.v3"
)
@ -28,7 +28,7 @@ func mapKeysToStrings(node *yaml.Node) {
}
}
func NewJONEncoder(indent int, colorise bool) Encoder {
func NewJSONEncoder(indent int, colorise bool) Encoder {
var indentString = ""
for index := 0; index < indent; index++ {

View File

@ -13,7 +13,7 @@ func yamlToJSON(sampleYaml string, indent int) string {
var output bytes.Buffer
writer := bufio.NewWriter(&output)
var jsonEncoder = NewJONEncoder(indent, false)
var jsonEncoder = NewJSONEncoder(indent, false)
inputs, err := readDocuments(strings.NewReader(sampleYaml), "sample.yml", 0, NewYamlDecoder())
if err != nil {
panic(err)

View File

@ -17,6 +17,59 @@ b:
- 4
`
const sampleNdJson = `{"this": "is a multidoc json file"}
{"each": ["line is a valid json document"]}
{"a number": 4}
`
const expectedNdJsonYaml = `this: is a multidoc json file
---
each:
- line is a valid json document
---
a number: 4
`
const expectedRoundTripSampleNdJson = `{"this":"is a multidoc json file"}
{"each":["line is a valid json document"]}
{"a number":4}
`
const expectedUpdatedMultilineJson = `{"this":"is a multidoc json file"}
{"each":["line is a valid json document","cool"]}
{"a number":4}
`
const sampleMultiLineJson = `{
"this": "is a multidoc json file"
}
{
"it": [
"has",
"consecutive",
"json documents"
]
}
{
"a number": 4
}
`
const roundTripMultiLineJson = `{
"this": "is a multidoc json file"
}
{
"it": [
"has",
"consecutive",
"json documents"
]
}
{
"a number": 4
}
`
var jsonScenarios = []formatScenario{
{
description: "Parse json: simple",
@ -68,6 +121,120 @@ var jsonScenarios = []formatScenario{
expected: "{\"stuff\":\"cool\"}\n{\"whatever\":\"cat\"}\n",
scenarioType: "encode",
},
{
description: "Roundtrip NDJSON",
subdescription: "Unfortunately the json encoder strips leading spaces of values.",
input: sampleNdJson,
expected: expectedRoundTripSampleNdJson,
scenarioType: "roundtrip-ndjson",
},
{
description: "Roundtrip multi-document JSON",
subdescription: "The NDJSON parser can also handle multiple multi-line json documents in a single file!",
input: sampleMultiLineJson,
expected: roundTripMultiLineJson,
scenarioType: "roundtrip-multi",
},
{
description: "Update a specific document in a multi-document json",
subdescription: "Documents are indexed by the `documentIndex` or `di` operator.",
input: sampleNdJson,
expected: expectedUpdatedMultilineJson,
expression: `(select(di == 1) | .each ) += "cool"`,
scenarioType: "roundtrip-ndjson",
},
{
description: "Find and update a specific document in a multi-document json",
subdescription: "Use expressions as you normally would.",
input: sampleNdJson,
expected: expectedUpdatedMultilineJson,
expression: `(select(has("each")) | .each ) += "cool"`,
scenarioType: "roundtrip-ndjson",
},
{
description: "Decode NDJSON",
input: sampleNdJson,
expected: expectedNdJsonYaml,
scenarioType: "decode-ndjson",
},
{
description: "numbers",
skipDoc: true,
input: "[3, 3.0, 3.1, -1]",
expected: "- 3\n- 3\n- 3.1\n- -1\n",
scenarioType: "decode-ndjson",
},
{
description: "strings",
skipDoc: true,
input: `["", "cat"]`,
expected: "- \"\"\n- cat\n",
scenarioType: "decode-ndjson",
},
{
description: "null",
skipDoc: true,
input: `null`,
expected: "null\n",
scenarioType: "decode-ndjson",
},
{
description: "booleans",
skipDoc: true,
input: `[true, false]`,
expected: "- true\n- false\n",
scenarioType: "decode-ndjson",
},
}
func documentRoundtripNdJsonScenario(w *bufio.Writer, s formatScenario, indent int) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.json file of:\n")
writeOrPanic(w, fmt.Sprintf("```json\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=json -o=json -I=%v '%v' sample.json\n```\n", indent, expression))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=json -o=json -I=%v sample.json\n```\n", indent))
}
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewJSONDecoder(), NewJSONEncoder(indent, false))))
}
func documentDecodeNdJsonScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.json file of:\n")
writeOrPanic(w, fmt.Sprintf("```json\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=json '%v' sample.json\n```\n", expression))
} else {
writeOrPanic(w, "```bash\nyq -p=json sample.json\n```\n")
}
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", processFormatScenario(s, NewJSONDecoder(), NewYamlEncoder(s.indent, false, true, true))))
}
func decodeJSON(t *testing.T, jsonString string) *CandidateNode {
@ -98,10 +265,16 @@ func decodeJSON(t *testing.T, jsonString string) *CandidateNode {
func testJSONScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "encode", "decode":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewJONEncoder(s.indent, false)), s.description)
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewJSONEncoder(s.indent, false)), s.description)
case "":
var actual = resultToString(t, decodeJSON(t, s.input))
test.AssertResultWithContext(t, s.expected, actual, s.description)
case "decode-ndjson":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewJSONDecoder(), NewYamlEncoder(2, false, true, true)), s.description)
case "roundtrip-ndjson":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewJSONDecoder(), NewJSONEncoder(0, false)), s.description)
case "roundtrip-multi":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewJSONDecoder(), NewJSONEncoder(2, false)), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
@ -147,6 +320,12 @@ func documentJSONScenario(t *testing.T, w *bufio.Writer, i interface{}) {
documentJSONDecodeScenario(t, w, s)
case "encode":
documentJSONEncodeScenario(w, s)
case "decode-ndjson":
documentDecodeNdJsonScenario(w, s)
case "roundtrip-ndjson":
documentRoundtripNdJsonScenario(w, s, 0)
case "roundtrip-multi":
documentRoundtripNdJsonScenario(w, s, 2)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
@ -178,7 +357,7 @@ func documentJSONEncodeScenario(w *bufio.Writer, s formatScenario) {
}
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```json\n%v```\n\n", processFormatScenario(s, NewYamlDecoder(), NewJONEncoder(s.indent, false))))
writeOrPanic(w, fmt.Sprintf("```json\n%v```\n\n", processFormatScenario(s, NewYamlDecoder(), NewJSONEncoder(s.indent, false))))
}
func TestJSONScenarios(t *testing.T) {

View File

@ -240,20 +240,28 @@ func guessTagFromCustomType(node *yaml.Node) string {
log.Warning("node has no value to guess the type with")
return node.Tag
}
dataBucket, errorReading := parseSnippet(node.Value)
decoder := NewYamlDecoder()
decoder.Init(strings.NewReader(node.Value))
var dataBucket yaml.Node
errorReading := decoder.Decode(&dataBucket)
if errorReading != nil {
log.Warning("could not guess underlying tag type %v", errorReading)
return node.Tag
}
guessedTag := unwrapDoc(&dataBucket).Tag
guessedTag := unwrapDoc(dataBucket).Tag
log.Info("im guessing the tag %v is a %v", node.Tag, guessedTag)
return guessedTag
}
func parseSnippet(value string) (*yaml.Node, error) {
decoder := NewYamlDecoder()
decoder.Init(strings.NewReader(value))
var dataBucket yaml.Node
err := decoder.Decode(&dataBucket)
if len(dataBucket.Content) == 0 {
return nil, fmt.Errorf("bad data")
}
return dataBucket.Content[0], err
}
func recursiveNodeEqual(lhs *yaml.Node, rhs *yaml.Node) bool {
if lhs.Kind != rhs.Kind {
return false

View File

@ -13,7 +13,7 @@ import (
func configureEncoder(format PrinterOutputFormat, indent int) Encoder {
switch format {
case JSONOutputFormat:
return NewJONEncoder(indent, false)
return NewJSONEncoder(indent, false)
case PropsOutputFormat:
return NewPropertiesEncoder(true)
case CSVOutputFormat:

View File

@ -314,7 +314,7 @@ func TestPrinterMultipleDocsJson(t *testing.T) {
var writer = bufio.NewWriter(&output)
// note printDocSeparators is true, it should still not print document separators
// when outputing JSON.
printer := NewPrinter(NewJONEncoder(0, false), NewSinglePrinterWriter(writer))
printer := NewPrinter(NewJSONEncoder(0, false), NewSinglePrinterWriter(writer))
inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder())
if err != nil {