eval-reduce wip

This commit is contained in:
Mike Farah 2021-07-24 15:21:53 +10:00
parent eeac03a437
commit bb5a6f2184
13 changed files with 279 additions and 21 deletions

View File

@ -34,7 +34,8 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.
## Evaluate All ##
This command loads _all_ yaml documents of _all_ yaml files and runs expression once
Useful when you need to run an expression across several yaml documents or files (like merge).
Note that it consumes more memory than eval.
If you're just merging entire multiple files together, you may want to consider eval-reduce as it's faster.
Note that it consumes more memory than eval and eval-reduce.
`,
RunE: evaluateAll,
}

View File

@ -0,0 +1,112 @@
package cmd
import (
"errors"
"fmt"
"os"
"github.com/mikefarah/yq/v4/pkg/yqlib"
"github.com/spf13/cobra"
)
func createEvaluateReduceCommand() *cobra.Command {
var cmdEvalReduce = &cobra.Command{
Use: "eval-reduce [reduce expression] [yaml_file1]...",
Aliases: []string{"er"},
Short: "Runs a reduce expression sequentially against each document of each file given. More memory efficient than using eval-all if you can get away with it.",
Example: `
# Merge f2.yml into f1.yml (inplace)
yq eval-reduce --inplace '{} ; . * $doc' f1.yml f2.yml
`,
Long: `yq is a portable command-line YAML processor (https://github.com/mikefarah/yq/)
See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.
## Evaluate Reduce ##
This command runs the reduce expression against each document of each file given, accumulating the results.
It is most useful when merging multiple files (but isn't as flexible as using eval-all with ireduce, as you
can only merge the top level nodes).
`,
RunE: evaluateReduce,
}
return cmdEvalReduce
}
func evaluateReduce(cmd *cobra.Command, args []string) error {
cmd.SilenceUsage = true
// 2+ args, [0] = expression, file the rest
var err error
out := cmd.OutOrStdout()
fileInfo, _ := os.Stdout.Stat()
if forceColor || (!forceNoColor && (fileInfo.Mode()&os.ModeCharDevice) != 0) {
colorsEnabled = true
}
firstFileIndex := -1
if !nullInput && len(args) == 1 {
firstFileIndex = 0
} else if len(args) > 1 {
firstFileIndex = 1
}
if writeInplace && (firstFileIndex == -1) {
return fmt.Errorf("Write inplace flag only applicable when giving an expression and at least one file")
}
if writeInplace {
// only use colors if its forced
colorsEnabled = forceColor
writeInPlaceHandler := yqlib.NewWriteInPlaceHandler(args[firstFileIndex])
out, err = writeInPlaceHandler.CreateTempFile()
if err != nil {
return err
}
// need to indirectly call the function so that completedSuccessfully is
// passed when we finish execution as opposed to now
defer func() { writeInPlaceHandler.FinishWriteInPlace(completedSuccessfully) }()
}
if nullInput && len(args) > 1 {
return errors.New("Cannot pass files in when using null-input flag")
}
printer := yqlib.NewPrinter(out, outputToJSON, unwrapScalar, colorsEnabled, indent, !noDocSeparators)
if frontMatter != "" {
frontMatterHandler := yqlib.NewFrontMatterHandler(args[firstFileIndex])
err = frontMatterHandler.Split()
if err != nil {
return err
}
args[firstFileIndex] = frontMatterHandler.GetYamlFrontMatterFilename()
if frontMatter == "process" {
reader := frontMatterHandler.GetContentReader()
printer.SetAppendix(reader)
defer yqlib.SafelyCloseReader(reader)
}
defer frontMatterHandler.CleanUp()
}
reduceEvaluator := yqlib.NewReduceEvaluator()
switch len(args) {
case 0:
cmd.Println(cmd.UsageString())
return nil
case 1:
cmd.Println(cmd.UsageString())
return nil
default:
err = reduceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing)
}
completedSuccessfully = err == nil
if err == nil && exitStatus && !printer.PrintedAnything() {
return errors.New("no matches found")
}
return err
}

View File

@ -59,6 +59,7 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`,
rootCmd.AddCommand(
createEvaluateSequenceCommand(),
createEvaluateAllCommand(),
createEvaluateReduceCommand(),
completionCmd,
)
return rootCmd

View File

@ -1,4 +1,3 @@
a: apple
---
# hi peeps
# cool
a: test
a2: fish

View File

@ -1,7 +1 @@
a: other # better than the original
b: [3, 4]
c:
toast: leave
test: 1
tell: 1
tasty.taco: cool
b: doc2

View File

@ -30,6 +30,7 @@ func (n *Context) GetVariable(name string) *list.List {
if n.Variables == nil {
return nil
}
log.Debug("GetVariable - %v to %v", name, NodesToString(n.Variables[name]))
return n.Variables[name]
}
@ -37,6 +38,7 @@ func (n *Context) SetVariable(name string, value *list.List) {
if n.Variables == nil {
n.Variables = make(map[string]*list.List)
}
log.Debug("SetVariable - %v to %v", name, NodesToString(value))
n.Variables[name] = value
}

View File

@ -112,7 +112,7 @@ type Operation struct {
OperationType *operationType
Value interface{}
StringValue string
CandidateNode *CandidateNode // used for Value Path elements
ValueNodes *list.List // used for Value Path elements
Preferences interface{}
UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs
}
@ -138,12 +138,13 @@ func createScalarNode(value interface{}, stringValue string) *yaml.Node {
func createValueOperation(value interface{}, stringValue string) *Operation {
var node *yaml.Node = createScalarNode(value, stringValue)
list := list.New()
list.PushBack(&CandidateNode{Node: node})
return &Operation{
OperationType: valueOpType,
Value: value,
StringValue: stringValue,
CandidateNode: &CandidateNode{Node: node},
ValueNodes: list,
}
}

View File

@ -13,7 +13,8 @@ type envOpPreferences struct {
}
func envOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
envName := expressionNode.Operation.CandidateNode.Node.Value
envNameNode := expressionNode.Operation.ValueNodes.Front().Value.(*CandidateNode)
envName := envNameNode.Node.Value
log.Debug("EnvOperator, env name:", envName)
rawValue := os.Getenv(envName)

View File

@ -136,7 +136,9 @@ func applyAssignment(d *dataTreeNavigator, context Context, pathIndexToStartFrom
} else {
log.Debugf("merge - assignmentOp := &Operation{OperationType: assignAttributesOpType}")
}
rhsOp := &Operation{OperationType: valueOpType, CandidateNode: rhs}
valueNodes := list.New()
valueNodes.PushBack(rhs)
rhsOp := &Operation{OperationType: valueOpType, ValueNodes: valueNodes}
assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: createTraversalTree(lhsPath, preferences.TraversePrefs, rhs.IsMapKey), Rhs: &ExpressionNode{Operation: rhsOp}}

View File

@ -24,7 +24,7 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
arrayExpNode := expressionNode.Lhs.Lhs
array, err := d.GetMatchingNodes(context, arrayExpNode)
log.Debugf("array of %v things", array.MatchingNodes.Len())
log.Debugf("reducing %v", NodesToString(array.MatchingNodes))
if err != nil {
return Context{}, err
@ -39,6 +39,8 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
return Context{}, err
}
log.Debugf("initialised with %v", NodesToString(accum.MatchingNodes))
log.Debugf("with variable %v", variableName)
blockExp := expressionNode.Rhs.Rhs

View File

@ -1,6 +1,5 @@
package yqlib
func valueOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debug("value = %v", expressionNode.Operation.CandidateNode.Node.Value)
return context.SingleChildContext(expressionNode.Operation.CandidateNode), nil
return context.ChildContext(expressionNode.Operation.ValueNodes), nil
}

View File

@ -23,7 +23,9 @@ func compoundAssignFunction(d *dataTreeNavigator, context Context, expressionNod
for el := lhs.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode)
valueOp.CandidateNode = candidate
valueNodes := list.New()
valueNodes.PushBack(candidate)
valueOp.ValueNodes = valueNodes
valueExpression := &ExpressionNode{Operation: valueOp}
assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: valueExpression, Rhs: calculation(valueExpression, expressionNode.Rhs)}
@ -83,7 +85,7 @@ func doCrossFunc(d *dataTreeNavigator, context Context, expressionNode *Expressi
if err != nil {
return Context{}, err
}
log.Debugf("crossFunction LHS len: %v", lhs.MatchingNodes.Len())
log.Debugf("crossFunction LHS %v", NodesToString(lhs.MatchingNodes))
rhs, err := d.GetMatchingNodes(context, expressionNode.Rhs)

View File

@ -0,0 +1,142 @@
package yqlib
import (
"container/list"
"fmt"
"io"
"os"
yaml "gopkg.in/yaml.v3"
)
type ReduceEvaluator interface {
EvaluateFiles(reduceExpression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error
}
type reduceEvaluator struct {
treeNavigator DataTreeNavigator
treeCreator ExpressionParser
reduceLhs *ExpressionNode
fileIndex int
}
func NewReduceEvaluator() ReduceEvaluator {
treeCreator := NewExpressionParser()
reduceLhs, err := treeCreator.ParseExpression(". as $doc")
if err != nil {
panic(err)
}
return &reduceEvaluator{treeNavigator: NewDataTreeNavigator(), treeCreator: treeCreator, reduceLhs: reduceLhs}
}
func (r *reduceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error {
node, err := r.treeCreator.ParseExpression(expression)
if err != nil {
return err
}
log.Debug("node %v", node.Operation.toString())
if node.Operation.OperationType != blockOpType {
return fmt.Errorf("Invalid reduce expression - expected '<initialValue>; <block that uses $doc>' got '%v'", expression)
}
currentValue := node.Lhs
reduceExp := node.Rhs
firstLeadingContent := ""
log.Debug("initialValue %v", currentValue.Operation.toString())
log.Debug("reduce Exp %v", reduceExp.Operation.toString())
for index, filename := range filenames {
reader, leadingContent, err := readStream(filename, leadingContentPreProcessing)
if index == 0 {
firstLeadingContent = leadingContent
}
if err != nil {
return err
}
currentValue, err = r.ReduceFile(filename, leadingContent, reader, currentValue, reduceExp)
if err != nil {
return err
}
switch reader := reader.(type) {
case *os.File:
safelyCloseFile(reader)
}
}
result := currentValue.Operation.ValueNodes
if result.Len() > 0 {
result.Front().Value.(*CandidateNode).Node.HeadComment = firstLeadingContent
}
printer.PrintResults(result)
return nil
}
func (r *reduceEvaluator) createReduceOp(initialValue *ExpressionNode, reduceExp *ExpressionNode) *ExpressionNode {
reduceBlock := &ExpressionNode{
Operation: &Operation{OperationType: blockOpType},
Lhs: initialValue,
Rhs: reduceExp,
}
return &ExpressionNode{
Operation: &Operation{OperationType: reduceOpType},
Lhs: r.reduceLhs,
Rhs: reduceBlock,
}
}
func (r *reduceEvaluator) ReduceFile(filename string, leadingContent string, reader io.Reader, initialValue *ExpressionNode, reduceExp *ExpressionNode) (*ExpressionNode, error) {
var currentIndex uint
var currentValue = initialValue
decoder := yaml.NewDecoder(reader)
for {
var dataBucket yaml.Node
errorReading := decoder.Decode(&dataBucket)
if errorReading == io.EOF {
r.fileIndex = r.fileIndex + 1
return currentValue, nil
} else if errorReading != nil {
return currentValue, errorReading
}
candidateNode := &CandidateNode{
Document: currentIndex,
Filename: filename,
Node: &dataBucket,
FileIndex: r.fileIndex,
}
inputList := list.New()
inputList.PushBack(candidateNode)
reduceOp := r.createReduceOp(currentValue, reduceExp)
// log.Debug("reduce - currentValueBefore: %v", NodesToString(currentValue.Operation.ValueNodes))
result, errorParsing := r.treeNavigator.GetMatchingNodes(Context{MatchingNodes: inputList}, reduceOp)
if errorParsing != nil {
return currentValue, errorParsing
}
currentValue = &ExpressionNode{
Operation: &Operation{
OperationType: valueOpType,
ValueNodes: result.MatchingNodes,
},
}
log.Debug("reduce - currentValueAfter: %v", NodesToString(currentValue.Operation.ValueNodes))
currentIndex = currentIndex + 1
}
}