From bb5a6f218414e8e70a62de88f775108f4f9d63fb Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Sat, 24 Jul 2021 15:21:53 +1000 Subject: [PATCH] eval-reduce wip --- cmd/evaluate_all_command.go | 3 +- cmd/evaluate_reduce_command.go | 112 ++++++++++++++++++++++++++ cmd/root.go | 1 + examples/data1.yaml | 5 +- examples/data2.yaml | 8 +- pkg/yqlib/context.go | 2 + pkg/yqlib/lib.go | 7 +- pkg/yqlib/operator_env.go | 3 +- pkg/yqlib/operator_multiply.go | 4 +- pkg/yqlib/operator_reduce.go | 4 +- pkg/yqlib/operator_value.go | 3 +- pkg/yqlib/operators.go | 6 +- pkg/yqlib/reduce_evaluator.go | 142 +++++++++++++++++++++++++++++++++ 13 files changed, 279 insertions(+), 21 deletions(-) create mode 100644 cmd/evaluate_reduce_command.go create mode 100644 pkg/yqlib/reduce_evaluator.go diff --git a/cmd/evaluate_all_command.go b/cmd/evaluate_all_command.go index fec5b2c8..baaaf025 100644 --- a/cmd/evaluate_all_command.go +++ b/cmd/evaluate_all_command.go @@ -34,7 +34,8 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples. ## Evaluate All ## This command loads _all_ yaml documents of _all_ yaml files and runs expression once Useful when you need to run an expression across several yaml documents or files (like merge). -Note that it consumes more memory than eval. +If you're just merging entire multiple files together, you may want to consider eval-reduce as it's faster. +Note that it consumes more memory than eval and eval-reduce. `, RunE: evaluateAll, } diff --git a/cmd/evaluate_reduce_command.go b/cmd/evaluate_reduce_command.go new file mode 100644 index 00000000..facfb506 --- /dev/null +++ b/cmd/evaluate_reduce_command.go @@ -0,0 +1,112 @@ +package cmd + +import ( + "errors" + "fmt" + "os" + + "github.com/mikefarah/yq/v4/pkg/yqlib" + "github.com/spf13/cobra" +) + +func createEvaluateReduceCommand() *cobra.Command { + var cmdEvalReduce = &cobra.Command{ + Use: "eval-reduce [reduce expression] [yaml_file1]...", + Aliases: []string{"er"}, + Short: "Runs a reduce expression sequentially against each document of each file given. More memory efficient than using eval-all if you can get away with it.", + Example: ` +# Merge f2.yml into f1.yml (inplace) +yq eval-reduce --inplace '{} ; . * $doc' f1.yml f2.yml +`, + Long: `yq is a portable command-line YAML processor (https://github.com/mikefarah/yq/) +See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples. + +## Evaluate Reduce ## +This command runs the reduce expression against each document of each file given, accumulating the results. +It is most useful when merging multiple files (but isn't as flexible as using eval-all with ireduce, as you +can only merge the top level nodes). +`, + RunE: evaluateReduce, + } + return cmdEvalReduce +} +func evaluateReduce(cmd *cobra.Command, args []string) error { + cmd.SilenceUsage = true + // 2+ args, [0] = expression, file the rest + + var err error + + out := cmd.OutOrStdout() + + fileInfo, _ := os.Stdout.Stat() + + if forceColor || (!forceNoColor && (fileInfo.Mode()&os.ModeCharDevice) != 0) { + colorsEnabled = true + } + + firstFileIndex := -1 + if !nullInput && len(args) == 1 { + firstFileIndex = 0 + } else if len(args) > 1 { + firstFileIndex = 1 + } + + if writeInplace && (firstFileIndex == -1) { + return fmt.Errorf("Write inplace flag only applicable when giving an expression and at least one file") + } + + if writeInplace { + // only use colors if its forced + colorsEnabled = forceColor + writeInPlaceHandler := yqlib.NewWriteInPlaceHandler(args[firstFileIndex]) + out, err = writeInPlaceHandler.CreateTempFile() + if err != nil { + return err + } + // need to indirectly call the function so that completedSuccessfully is + // passed when we finish execution as opposed to now + defer func() { writeInPlaceHandler.FinishWriteInPlace(completedSuccessfully) }() + } + + if nullInput && len(args) > 1 { + return errors.New("Cannot pass files in when using null-input flag") + } + + printer := yqlib.NewPrinter(out, outputToJSON, unwrapScalar, colorsEnabled, indent, !noDocSeparators) + + if frontMatter != "" { + frontMatterHandler := yqlib.NewFrontMatterHandler(args[firstFileIndex]) + err = frontMatterHandler.Split() + if err != nil { + return err + } + args[firstFileIndex] = frontMatterHandler.GetYamlFrontMatterFilename() + + if frontMatter == "process" { + reader := frontMatterHandler.GetContentReader() + printer.SetAppendix(reader) + defer yqlib.SafelyCloseReader(reader) + } + defer frontMatterHandler.CleanUp() + } + + reduceEvaluator := yqlib.NewReduceEvaluator() + switch len(args) { + case 0: + cmd.Println(cmd.UsageString()) + return nil + case 1: + cmd.Println(cmd.UsageString()) + return nil + default: + err = reduceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing) + } + + completedSuccessfully = err == nil + + if err == nil && exitStatus && !printer.PrintedAnything() { + return errors.New("no matches found") + } + + return err +} diff --git a/cmd/root.go b/cmd/root.go index c080745f..68ce3bc5 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -59,6 +59,7 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`, rootCmd.AddCommand( createEvaluateSequenceCommand(), createEvaluateAllCommand(), + createEvaluateReduceCommand(), completionCmd, ) return rootCmd diff --git a/examples/data1.yaml b/examples/data1.yaml index 0d0503c0..d395eca7 100644 --- a/examples/data1.yaml +++ b/examples/data1.yaml @@ -1,4 +1,3 @@ +a: apple --- -# hi peeps -# cool -a: test \ No newline at end of file +a2: fish \ No newline at end of file diff --git a/examples/data2.yaml b/examples/data2.yaml index 9efa4fc6..3f898aba 100644 --- a/examples/data2.yaml +++ b/examples/data2.yaml @@ -1,7 +1 @@ -a: other # better than the original -b: [3, 4] -c: - toast: leave - test: 1 - tell: 1 - tasty.taco: cool +b: doc2 \ No newline at end of file diff --git a/pkg/yqlib/context.go b/pkg/yqlib/context.go index 817527ab..e05e5418 100644 --- a/pkg/yqlib/context.go +++ b/pkg/yqlib/context.go @@ -30,6 +30,7 @@ func (n *Context) GetVariable(name string) *list.List { if n.Variables == nil { return nil } + log.Debug("GetVariable - %v to %v", name, NodesToString(n.Variables[name])) return n.Variables[name] } @@ -37,6 +38,7 @@ func (n *Context) SetVariable(name string, value *list.List) { if n.Variables == nil { n.Variables = make(map[string]*list.List) } + log.Debug("SetVariable - %v to %v", name, NodesToString(value)) n.Variables[name] = value } diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index 1c3fa15c..b47cbc5a 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -112,7 +112,7 @@ type Operation struct { OperationType *operationType Value interface{} StringValue string - CandidateNode *CandidateNode // used for Value Path elements + ValueNodes *list.List // used for Value Path elements Preferences interface{} UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs } @@ -138,12 +138,13 @@ func createScalarNode(value interface{}, stringValue string) *yaml.Node { func createValueOperation(value interface{}, stringValue string) *Operation { var node *yaml.Node = createScalarNode(value, stringValue) - + list := list.New() + list.PushBack(&CandidateNode{Node: node}) return &Operation{ OperationType: valueOpType, Value: value, StringValue: stringValue, - CandidateNode: &CandidateNode{Node: node}, + ValueNodes: list, } } diff --git a/pkg/yqlib/operator_env.go b/pkg/yqlib/operator_env.go index f6c5eebc..f2b70ca7 100644 --- a/pkg/yqlib/operator_env.go +++ b/pkg/yqlib/operator_env.go @@ -13,7 +13,8 @@ type envOpPreferences struct { } func envOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { - envName := expressionNode.Operation.CandidateNode.Node.Value + envNameNode := expressionNode.Operation.ValueNodes.Front().Value.(*CandidateNode) + envName := envNameNode.Node.Value log.Debug("EnvOperator, env name:", envName) rawValue := os.Getenv(envName) diff --git a/pkg/yqlib/operator_multiply.go b/pkg/yqlib/operator_multiply.go index 8d273066..c6bc0ac2 100644 --- a/pkg/yqlib/operator_multiply.go +++ b/pkg/yqlib/operator_multiply.go @@ -136,7 +136,9 @@ func applyAssignment(d *dataTreeNavigator, context Context, pathIndexToStartFrom } else { log.Debugf("merge - assignmentOp := &Operation{OperationType: assignAttributesOpType}") } - rhsOp := &Operation{OperationType: valueOpType, CandidateNode: rhs} + valueNodes := list.New() + valueNodes.PushBack(rhs) + rhsOp := &Operation{OperationType: valueOpType, ValueNodes: valueNodes} assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: createTraversalTree(lhsPath, preferences.TraversePrefs, rhs.IsMapKey), Rhs: &ExpressionNode{Operation: rhsOp}} diff --git a/pkg/yqlib/operator_reduce.go b/pkg/yqlib/operator_reduce.go index cc908fad..cd109e8c 100644 --- a/pkg/yqlib/operator_reduce.go +++ b/pkg/yqlib/operator_reduce.go @@ -24,7 +24,7 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre arrayExpNode := expressionNode.Lhs.Lhs array, err := d.GetMatchingNodes(context, arrayExpNode) - log.Debugf("array of %v things", array.MatchingNodes.Len()) + log.Debugf("reducing %v", NodesToString(array.MatchingNodes)) if err != nil { return Context{}, err @@ -39,6 +39,8 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre return Context{}, err } + log.Debugf("initialised with %v", NodesToString(accum.MatchingNodes)) + log.Debugf("with variable %v", variableName) blockExp := expressionNode.Rhs.Rhs diff --git a/pkg/yqlib/operator_value.go b/pkg/yqlib/operator_value.go index 6c70b1f7..90f2da5d 100644 --- a/pkg/yqlib/operator_value.go +++ b/pkg/yqlib/operator_value.go @@ -1,6 +1,5 @@ package yqlib func valueOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { - log.Debug("value = %v", expressionNode.Operation.CandidateNode.Node.Value) - return context.SingleChildContext(expressionNode.Operation.CandidateNode), nil + return context.ChildContext(expressionNode.Operation.ValueNodes), nil } diff --git a/pkg/yqlib/operators.go b/pkg/yqlib/operators.go index 70d9d8d5..56a9769f 100644 --- a/pkg/yqlib/operators.go +++ b/pkg/yqlib/operators.go @@ -23,7 +23,9 @@ func compoundAssignFunction(d *dataTreeNavigator, context Context, expressionNod for el := lhs.MatchingNodes.Front(); el != nil; el = el.Next() { candidate := el.Value.(*CandidateNode) - valueOp.CandidateNode = candidate + valueNodes := list.New() + valueNodes.PushBack(candidate) + valueOp.ValueNodes = valueNodes valueExpression := &ExpressionNode{Operation: valueOp} assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: valueExpression, Rhs: calculation(valueExpression, expressionNode.Rhs)} @@ -83,7 +85,7 @@ func doCrossFunc(d *dataTreeNavigator, context Context, expressionNode *Expressi if err != nil { return Context{}, err } - log.Debugf("crossFunction LHS len: %v", lhs.MatchingNodes.Len()) + log.Debugf("crossFunction LHS %v", NodesToString(lhs.MatchingNodes)) rhs, err := d.GetMatchingNodes(context, expressionNode.Rhs) diff --git a/pkg/yqlib/reduce_evaluator.go b/pkg/yqlib/reduce_evaluator.go new file mode 100644 index 00000000..a33c2039 --- /dev/null +++ b/pkg/yqlib/reduce_evaluator.go @@ -0,0 +1,142 @@ +package yqlib + +import ( + "container/list" + "fmt" + "io" + "os" + + yaml "gopkg.in/yaml.v3" +) + +type ReduceEvaluator interface { + EvaluateFiles(reduceExpression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error +} + +type reduceEvaluator struct { + treeNavigator DataTreeNavigator + treeCreator ExpressionParser + reduceLhs *ExpressionNode + fileIndex int +} + +func NewReduceEvaluator() ReduceEvaluator { + treeCreator := NewExpressionParser() + reduceLhs, err := treeCreator.ParseExpression(". as $doc") + if err != nil { + panic(err) + } + return &reduceEvaluator{treeNavigator: NewDataTreeNavigator(), treeCreator: treeCreator, reduceLhs: reduceLhs} +} + +func (r *reduceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error { + + node, err := r.treeCreator.ParseExpression(expression) + if err != nil { + return err + } + log.Debug("node %v", node.Operation.toString()) + + if node.Operation.OperationType != blockOpType { + return fmt.Errorf("Invalid reduce expression - expected '; ' got '%v'", expression) + } + + currentValue := node.Lhs + reduceExp := node.Rhs + firstLeadingContent := "" + + log.Debug("initialValue %v", currentValue.Operation.toString()) + + log.Debug("reduce Exp %v", reduceExp.Operation.toString()) + + for index, filename := range filenames { + reader, leadingContent, err := readStream(filename, leadingContentPreProcessing) + + if index == 0 { + firstLeadingContent = leadingContent + } + + if err != nil { + return err + } + + currentValue, err = r.ReduceFile(filename, leadingContent, reader, currentValue, reduceExp) + if err != nil { + return err + } + + switch reader := reader.(type) { + case *os.File: + safelyCloseFile(reader) + } + } + + result := currentValue.Operation.ValueNodes + + if result.Len() > 0 { + result.Front().Value.(*CandidateNode).Node.HeadComment = firstLeadingContent + } + + printer.PrintResults(result) + + return nil +} + +func (r *reduceEvaluator) createReduceOp(initialValue *ExpressionNode, reduceExp *ExpressionNode) *ExpressionNode { + reduceBlock := &ExpressionNode{ + Operation: &Operation{OperationType: blockOpType}, + Lhs: initialValue, + Rhs: reduceExp, + } + + return &ExpressionNode{ + Operation: &Operation{OperationType: reduceOpType}, + Lhs: r.reduceLhs, + Rhs: reduceBlock, + } +} + +func (r *reduceEvaluator) ReduceFile(filename string, leadingContent string, reader io.Reader, initialValue *ExpressionNode, reduceExp *ExpressionNode) (*ExpressionNode, error) { + + var currentIndex uint + var currentValue = initialValue + decoder := yaml.NewDecoder(reader) + for { + var dataBucket yaml.Node + errorReading := decoder.Decode(&dataBucket) + + if errorReading == io.EOF { + r.fileIndex = r.fileIndex + 1 + return currentValue, nil + } else if errorReading != nil { + return currentValue, errorReading + } + candidateNode := &CandidateNode{ + Document: currentIndex, + Filename: filename, + Node: &dataBucket, + FileIndex: r.fileIndex, + } + inputList := list.New() + inputList.PushBack(candidateNode) + + reduceOp := r.createReduceOp(currentValue, reduceExp) + // log.Debug("reduce - currentValueBefore: %v", NodesToString(currentValue.Operation.ValueNodes)) + + result, errorParsing := r.treeNavigator.GetMatchingNodes(Context{MatchingNodes: inputList}, reduceOp) + if errorParsing != nil { + return currentValue, errorParsing + } + + currentValue = &ExpressionNode{ + Operation: &Operation{ + OperationType: valueOpType, + ValueNodes: result.MatchingNodes, + }, + } + + log.Debug("reduce - currentValueAfter: %v", NodesToString(currentValue.Operation.ValueNodes)) + + currentIndex = currentIndex + 1 + } +}