diff --git a/pkg/yqlib/decoder.go b/pkg/yqlib/decoder.go index e8c63bf7..904b65c3 100644 --- a/pkg/yqlib/decoder.go +++ b/pkg/yqlib/decoder.go @@ -3,8 +3,6 @@ package yqlib import ( "fmt" "io" - - yaml "gopkg.in/yaml.v3" ) type InputFormat uint @@ -20,8 +18,8 @@ const ( ) type Decoder interface { - Init(reader io.Reader) - Decode(node *yaml.Node) error + Init(reader io.Reader) error + Decode() (*CandidateNode, error) } func InputFormatFromString(format string) (InputFormat, error) { diff --git a/pkg/yqlib/decoder_yaml.go b/pkg/yqlib/decoder_yaml.go index 37c4ad31..bf86a4e9 100644 --- a/pkg/yqlib/decoder_yaml.go +++ b/pkg/yqlib/decoder_yaml.go @@ -1,23 +1,92 @@ package yqlib import ( + "bufio" + "errors" "io" + "regexp" + "strings" yaml "gopkg.in/yaml.v3" ) type yamlDecoder struct { decoder yaml.Decoder + // work around of various parsing issues by yaml.v3 with document headers + prefs yamlPreferences + leadingContent string } -func NewYamlDecoder() Decoder { - return &yamlDecoder{} +func NewYamlDecoder(prefs yamlPreferences) Decoder { + return &yamlDecoder{prefs: prefs} } -func (dec *yamlDecoder) Init(reader io.Reader) { - dec.decoder = *yaml.NewDecoder(reader) +func (dec *yamlDecoder) processReadStream(reader *bufio.Reader) (io.Reader, string, error) { + var commentLineRegEx = regexp.MustCompile(`^\s*#`) + var sb strings.Builder + for { + peekBytes, err := reader.Peek(3) + if errors.Is(err, io.EOF) { + // EOF are handled else where.. + return reader, sb.String(), nil + } else if err != nil { + return reader, sb.String(), err + } else if string(peekBytes) == "---" { + _, err := reader.ReadString('\n') + sb.WriteString("$yqDocSeperator$\n") + if errors.Is(err, io.EOF) { + return reader, sb.String(), nil + } else if err != nil { + return reader, sb.String(), err + } + } else if commentLineRegEx.MatchString(string(peekBytes)) { + line, err := reader.ReadString('\n') + sb.WriteString(line) + if errors.Is(err, io.EOF) { + return reader, sb.String(), nil + } else if err != nil { + return reader, sb.String(), err + } + } else { + return reader, sb.String(), nil + } + } } -func (dec *yamlDecoder) Decode(rootYamlNode *yaml.Node) error { - return dec.decoder.Decode(rootYamlNode) +func (dec *yamlDecoder) Init(reader io.Reader) error { + readerToUse := reader + leadingContent := "" + var err error + if dec.leadingContentPreProcessing { + readerToUse, leadingContent, err = dec.processReadStream(bufio.NewReader(reader)) + if err != nil { + return err + } + } + dec.leadingContent = leadingContent + dec.decoder = *yaml.NewDecoder(readerToUse) + return nil +} + +func (dec *yamlDecoder) Decode() (*CandidateNode, error) { + var dataBucket yaml.Node + + err := dec.decoder.Decode(&dataBucket) + if err != nil { + return nil, err + } + + candidateNode := &CandidateNode{ + Node: &dataBucket, + } + + if dec.leadingContent != "" { + candidateNode.LeadingContent = dec.leadingContent + dec.leadingContent = "" + } + // move document comments into candidate node + // otherwise unwrap drops them. + candidateNode.TrailingContent = dataBucket.FootComment + dataBucket.FootComment = "" + return candidateNode, nil } diff --git a/pkg/yqlib/encoder_yaml.go b/pkg/yqlib/encoder_yaml.go index 39142c19..99351589 100644 --- a/pkg/yqlib/encoder_yaml.go +++ b/pkg/yqlib/encoder_yaml.go @@ -11,10 +11,9 @@ import ( ) type yamlEncoder struct { - indent int - colorise bool - printDocSeparators bool - unwrapScalar bool + indent int + colorise bool + prefs yamlPreferences } func NewYamlEncoder(indent int, colorise bool, printDocSeparators bool, unwrapScalar bool) Encoder { diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 5494d72c..f092cb33 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -21,6 +21,14 @@ func InitExpressionParser() { } } +type yamlPreferences struct { + LeadingContentPreProcessing bool + printDocSeparators bool + unwrapScalar bool +} + +var YamlPreferences = NewDefaultYamlPreferences() + var log = logging.MustGetLogger("yq-lib") var PrettyPrintExp = `(... | (select(tag != "!!str"), select(tag == "!!str") | select(test("(?i)^(y|yes|n|no|on|off)$") | not)) ) style=""` diff --git a/pkg/yqlib/stream_evaluator.go b/pkg/yqlib/stream_evaluator.go index 55eb6623..c430453c 100644 --- a/pkg/yqlib/stream_evaluator.go +++ b/pkg/yqlib/stream_evaluator.go @@ -59,18 +59,13 @@ func (s *streamEvaluator) EvaluateFiles(expression string, filenames []string, p var firstFileLeadingContent string - for index, filename := range filenames { - reader, leadingContent, err := readStream(filename, leadingContentPreProcessing) - log.Debug("leadingContent: %v", leadingContent) - - if index == 0 { - firstFileLeadingContent = leadingContent - } + for _, filename := range filenames { + reader, err := readStream(filename) if err != nil { return err } - processedDocs, err := s.Evaluate(filename, reader, node, printer, leadingContent, decoder) + processedDocs, err := s.Evaluate(filename, reader, node, printer, decoder) if err != nil { return err } @@ -89,13 +84,12 @@ func (s *streamEvaluator) EvaluateFiles(expression string, filenames []string, p return nil } -func (s *streamEvaluator) Evaluate(filename string, reader io.Reader, node *ExpressionNode, printer Printer, leadingContent string, decoder Decoder) (uint, error) { +func (s *streamEvaluator) Evaluate(filename string, reader io.Reader, node *ExpressionNode, printer Printer, decoder Decoder) (uint, error) { var currentIndex uint decoder.Init(reader) for { - var dataBucket yaml.Node - errorReading := decoder.Decode(&dataBucket) + candidateNode, errorReading := decoder.Decode() if errors.Is(errorReading, io.EOF) { s.fileIndex = s.fileIndex + 1 @@ -103,21 +97,10 @@ func (s *streamEvaluator) Evaluate(filename string, reader io.Reader, node *Expr } else if errorReading != nil { return currentIndex, fmt.Errorf("bad file '%v': %w", filename, errorReading) } + candidateNode.Document = currentIndex + candidateNode.Filename = filename + candidateNode.FileIndex = s.fileIndex - candidateNode := &CandidateNode{ - Document: currentIndex, - Filename: filename, - Node: &dataBucket, - FileIndex: s.fileIndex, - } - // move document comments into candidate node - // otherwise unwrap drops them. - candidateNode.TrailingContent = dataBucket.FootComment - dataBucket.FootComment = "" - - if currentIndex == 0 { - candidateNode.LeadingContent = leadingContent - } inputList := list.New() inputList.PushBack(candidateNode) diff --git a/pkg/yqlib/utils.go b/pkg/yqlib/utils.go index 27964d58..55723af7 100644 --- a/pkg/yqlib/utils.go +++ b/pkg/yqlib/utils.go @@ -7,13 +7,10 @@ import ( "fmt" "io" "os" - "regexp" "strings" - - yaml "gopkg.in/yaml.v3" ) -func readStream(filename string, leadingContentPreProcessing bool) (io.Reader, string, error) { +func readStream(filename string) (io.Reader, error) { var reader *bufio.Reader if filename == "-" { reader = bufio.NewReader(os.Stdin) @@ -22,23 +19,16 @@ func readStream(filename string, leadingContentPreProcessing bool) (io.Reader, s // and ensuring that it's not possible to give a path to a file outside thar directory. file, err := os.Open(filename) // #nosec if err != nil { - return nil, "", err + return nil, err } reader = bufio.NewReader(file) } + return reader, nil - if !leadingContentPreProcessing { - return reader, "", nil - } - return processReadStream(reader) } -func readString(input string, leadingContentPreProcessing bool) (io.Reader, string, error) { - reader := bufio.NewReader(strings.NewReader(input)) - if !leadingContentPreProcessing { - return reader, "", nil - } - return processReadStream(reader) +func readString(input string) (io.Reader, error) { + return bufio.NewReader(strings.NewReader(input)), nil } func writeString(writer io.Writer, txt string) error { @@ -46,46 +36,13 @@ func writeString(writer io.Writer, txt string) error { return errorWriting } -func processReadStream(reader *bufio.Reader) (io.Reader, string, error) { - var commentLineRegEx = regexp.MustCompile(`^\s*#`) - var sb strings.Builder - for { - peekBytes, err := reader.Peek(3) - if errors.Is(err, io.EOF) { - // EOF are handled else where.. - return reader, sb.String(), nil - } else if err != nil { - return reader, sb.String(), err - } else if string(peekBytes) == "---" { - _, err := reader.ReadString('\n') - sb.WriteString("$yqDocSeperator$\n") - if errors.Is(err, io.EOF) { - return reader, sb.String(), nil - } else if err != nil { - return reader, sb.String(), err - } - } else if commentLineRegEx.MatchString(string(peekBytes)) { - line, err := reader.ReadString('\n') - sb.WriteString(line) - if errors.Is(err, io.EOF) { - return reader, sb.String(), nil - } else if err != nil { - return reader, sb.String(), err - } - } else { - return reader, sb.String(), nil - } - } -} - func readDocuments(reader io.Reader, filename string, fileIndex int, decoder Decoder) (*list.List, error) { decoder.Init(reader) inputList := list.New() var currentIndex uint for { - var dataBucket yaml.Node - errorReading := decoder.Decode(&dataBucket) + candidateNode, errorReading := decoder.Decode() if errors.Is(errorReading, io.EOF) { switch reader := reader.(type) { @@ -96,18 +53,10 @@ func readDocuments(reader io.Reader, filename string, fileIndex int, decoder Dec } else if errorReading != nil { return nil, fmt.Errorf("bad file '%v': %w", filename, errorReading) } - candidateNode := &CandidateNode{ - Document: currentIndex, - Filename: filename, - Node: &dataBucket, - FileIndex: fileIndex, - EvaluateTogether: true, - } - - //move document comments into candidate node - // otherwise unwrap drops them. - candidateNode.TrailingContent = dataBucket.FootComment - dataBucket.FootComment = "" + candidateNode.Document = currentIndex + candidateNode.Filename = filename + candidateNode.FileIndex = fileIndex + candidateNode.EvaluateTogether = true inputList.PushBack(candidateNode)