yq/pkg/yqlib/stream_evaluator.go

131 lines
3.6 KiB
Go
Raw Normal View History

2020-11-22 00:56:28 +00:00
package yqlib
import (
"container/list"
2021-11-23 22:27:17 +00:00
"errors"
"fmt"
2020-11-22 00:56:28 +00:00
"io"
"os"
yaml "gopkg.in/yaml.v3"
)
// A yaml expression evaluator that runs the expression multiple times for each given yaml document.
// Uses less memory than loading all documents and running the expression once, but this cannot process
// cross document expressions.
2020-11-22 00:56:28 +00:00
type StreamEvaluator interface {
2021-12-21 04:02:07 +00:00
Evaluate(filename string, reader io.Reader, node *ExpressionNode, printer Printer, leadingContent string, decoder Decoder) (uint, error)
EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool, decoder Decoder) error
2021-07-19 09:52:51 +00:00
EvaluateNew(expression string, printer Printer, leadingContent string) error
2020-11-22 00:56:28 +00:00
}
type streamEvaluator struct {
treeNavigator DataTreeNavigator
fileIndex int
}
func NewStreamEvaluator() StreamEvaluator {
2022-02-01 03:47:51 +00:00
return &streamEvaluator{treeNavigator: NewDataTreeNavigator()}
2020-11-22 00:56:28 +00:00
}
2021-07-19 09:52:51 +00:00
func (s *streamEvaluator) EvaluateNew(expression string, printer Printer, leadingContent string) error {
2022-02-01 03:47:51 +00:00
node, err := ExpressionParser.ParseExpression(expression)
2020-12-01 03:06:49 +00:00
if err != nil {
return err
}
candidateNode := &CandidateNode{
2021-11-12 04:02:28 +00:00
Document: 0,
Filename: "",
Node: &yaml.Node{Kind: yaml.DocumentNode, Content: []*yaml.Node{{Tag: "!!null", Kind: yaml.ScalarNode}}},
FileIndex: 0,
LeadingContent: leadingContent,
2020-12-01 03:06:49 +00:00
}
inputList := list.New()
inputList.PushBack(candidateNode)
result, errorParsing := s.treeNavigator.GetMatchingNodes(Context{MatchingNodes: inputList}, node)
2020-12-01 03:06:49 +00:00
if errorParsing != nil {
return errorParsing
}
return printer.PrintResults(result.MatchingNodes)
2020-12-01 03:06:49 +00:00
}
2021-12-21 04:02:07 +00:00
func (s *streamEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool, decoder Decoder) error {
var totalProcessDocs uint
2022-02-01 03:47:51 +00:00
node, err := ExpressionParser.ParseExpression(expression)
2020-11-22 00:56:28 +00:00
if err != nil {
return err
}
2021-07-19 09:52:51 +00:00
var firstFileLeadingContent string
for index, filename := range filenames {
reader, leadingContent, err := readStream(filename, leadingContentPreProcessing)
2022-04-14 02:19:15 +00:00
log.Debug("leadingContent: %v", leadingContent)
2021-07-19 09:52:51 +00:00
if index == 0 {
firstFileLeadingContent = leadingContent
}
2021-07-19 09:52:51 +00:00
2020-11-22 00:56:28 +00:00
if err != nil {
return err
}
2021-12-21 04:02:07 +00:00
processedDocs, err := s.Evaluate(filename, reader, node, printer, leadingContent, decoder)
2020-11-22 00:56:28 +00:00
if err != nil {
return err
}
2021-07-16 11:08:20 +00:00
totalProcessDocs = totalProcessDocs + processedDocs
2020-11-22 00:56:28 +00:00
switch reader := reader.(type) {
case *os.File:
safelyCloseFile(reader)
}
}
2021-07-16 11:08:20 +00:00
if totalProcessDocs == 0 {
2021-07-19 09:52:51 +00:00
return s.EvaluateNew(expression, printer, firstFileLeadingContent)
2021-07-16 11:08:20 +00:00
}
2020-11-22 00:56:28 +00:00
return nil
}
2021-12-21 04:02:07 +00:00
func (s *streamEvaluator) Evaluate(filename string, reader io.Reader, node *ExpressionNode, printer Printer, leadingContent string, decoder Decoder) (uint, error) {
2020-11-22 00:56:28 +00:00
var currentIndex uint
2021-12-21 04:02:07 +00:00
decoder.Init(reader)
2020-11-22 00:56:28 +00:00
for {
var dataBucket yaml.Node
errorReading := decoder.Decode(&dataBucket)
2021-11-22 06:43:38 +00:00
if errors.Is(errorReading, io.EOF) {
2020-11-22 00:56:28 +00:00
s.fileIndex = s.fileIndex + 1
2021-07-16 11:08:20 +00:00
return currentIndex, nil
2020-11-22 00:56:28 +00:00
} else if errorReading != nil {
return currentIndex, fmt.Errorf("bad file '%v': %w", filename, errorReading)
2020-11-22 00:56:28 +00:00
}
2021-11-12 04:02:28 +00:00
2020-11-22 00:56:28 +00:00
candidateNode := &CandidateNode{
Document: currentIndex,
Filename: filename,
Node: &dataBucket,
FileIndex: s.fileIndex,
}
2021-11-12 04:02:28 +00:00
if currentIndex == 0 {
candidateNode.LeadingContent = leadingContent
}
2020-11-22 00:56:28 +00:00
inputList := list.New()
inputList.PushBack(candidateNode)
result, errorParsing := s.treeNavigator.GetMatchingNodes(Context{MatchingNodes: inputList}, node)
2020-11-22 00:56:28 +00:00
if errorParsing != nil {
2021-07-16 11:08:20 +00:00
return currentIndex, errorParsing
2020-11-22 00:56:28 +00:00
}
err := printer.PrintResults(result.MatchingNodes)
2021-07-19 09:52:51 +00:00
2020-11-22 00:56:28 +00:00
if err != nil {
2021-07-16 11:08:20 +00:00
return currentIndex, err
2020-11-22 00:56:28 +00:00
}
currentIndex = currentIndex + 1
}
}