eval-reduce wip

This commit is contained in:
Mike Farah 2021-07-24 15:21:53 +10:00
parent eeac03a437
commit bb5a6f2184
13 changed files with 279 additions and 21 deletions

View File

@ -34,7 +34,8 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.
## Evaluate All ## ## Evaluate All ##
This command loads _all_ yaml documents of _all_ yaml files and runs expression once This command loads _all_ yaml documents of _all_ yaml files and runs expression once
Useful when you need to run an expression across several yaml documents or files (like merge). Useful when you need to run an expression across several yaml documents or files (like merge).
Note that it consumes more memory than eval. If you're just merging entire multiple files together, you may want to consider eval-reduce as it's faster.
Note that it consumes more memory than eval and eval-reduce.
`, `,
RunE: evaluateAll, RunE: evaluateAll,
} }

View File

@ -0,0 +1,112 @@
package cmd
import (
"errors"
"fmt"
"os"
"github.com/mikefarah/yq/v4/pkg/yqlib"
"github.com/spf13/cobra"
)
func createEvaluateReduceCommand() *cobra.Command {
var cmdEvalReduce = &cobra.Command{
Use: "eval-reduce [reduce expression] [yaml_file1]...",
Aliases: []string{"er"},
Short: "Runs a reduce expression sequentially against each document of each file given. More memory efficient than using eval-all if you can get away with it.",
Example: `
# Merge f2.yml into f1.yml (inplace)
yq eval-reduce --inplace '{} ; . * $doc' f1.yml f2.yml
`,
Long: `yq is a portable command-line YAML processor (https://github.com/mikefarah/yq/)
See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.
## Evaluate Reduce ##
This command runs the reduce expression against each document of each file given, accumulating the results.
It is most useful when merging multiple files (but isn't as flexible as using eval-all with ireduce, as you
can only merge the top level nodes).
`,
RunE: evaluateReduce,
}
return cmdEvalReduce
}
func evaluateReduce(cmd *cobra.Command, args []string) error {
cmd.SilenceUsage = true
// 2+ args, [0] = expression, file the rest
var err error
out := cmd.OutOrStdout()
fileInfo, _ := os.Stdout.Stat()
if forceColor || (!forceNoColor && (fileInfo.Mode()&os.ModeCharDevice) != 0) {
colorsEnabled = true
}
firstFileIndex := -1
if !nullInput && len(args) == 1 {
firstFileIndex = 0
} else if len(args) > 1 {
firstFileIndex = 1
}
if writeInplace && (firstFileIndex == -1) {
return fmt.Errorf("Write inplace flag only applicable when giving an expression and at least one file")
}
if writeInplace {
// only use colors if its forced
colorsEnabled = forceColor
writeInPlaceHandler := yqlib.NewWriteInPlaceHandler(args[firstFileIndex])
out, err = writeInPlaceHandler.CreateTempFile()
if err != nil {
return err
}
// need to indirectly call the function so that completedSuccessfully is
// passed when we finish execution as opposed to now
defer func() { writeInPlaceHandler.FinishWriteInPlace(completedSuccessfully) }()
}
if nullInput && len(args) > 1 {
return errors.New("Cannot pass files in when using null-input flag")
}
printer := yqlib.NewPrinter(out, outputToJSON, unwrapScalar, colorsEnabled, indent, !noDocSeparators)
if frontMatter != "" {
frontMatterHandler := yqlib.NewFrontMatterHandler(args[firstFileIndex])
err = frontMatterHandler.Split()
if err != nil {
return err
}
args[firstFileIndex] = frontMatterHandler.GetYamlFrontMatterFilename()
if frontMatter == "process" {
reader := frontMatterHandler.GetContentReader()
printer.SetAppendix(reader)
defer yqlib.SafelyCloseReader(reader)
}
defer frontMatterHandler.CleanUp()
}
reduceEvaluator := yqlib.NewReduceEvaluator()
switch len(args) {
case 0:
cmd.Println(cmd.UsageString())
return nil
case 1:
cmd.Println(cmd.UsageString())
return nil
default:
err = reduceEvaluator.EvaluateFiles(processExpression(args[0]), args[1:], printer, leadingContentPreProcessing)
}
completedSuccessfully = err == nil
if err == nil && exitStatus && !printer.PrintedAnything() {
return errors.New("no matches found")
}
return err
}

View File

@ -59,6 +59,7 @@ See https://mikefarah.gitbook.io/yq/ for detailed documentation and examples.`,
rootCmd.AddCommand( rootCmd.AddCommand(
createEvaluateSequenceCommand(), createEvaluateSequenceCommand(),
createEvaluateAllCommand(), createEvaluateAllCommand(),
createEvaluateReduceCommand(),
completionCmd, completionCmd,
) )
return rootCmd return rootCmd

View File

@ -1,4 +1,3 @@
a: apple
--- ---
# hi peeps a2: fish
# cool
a: test

View File

@ -1,7 +1 @@
a: other # better than the original b: doc2
b: [3, 4]
c:
toast: leave
test: 1
tell: 1
tasty.taco: cool

View File

@ -30,6 +30,7 @@ func (n *Context) GetVariable(name string) *list.List {
if n.Variables == nil { if n.Variables == nil {
return nil return nil
} }
log.Debug("GetVariable - %v to %v", name, NodesToString(n.Variables[name]))
return n.Variables[name] return n.Variables[name]
} }
@ -37,6 +38,7 @@ func (n *Context) SetVariable(name string, value *list.List) {
if n.Variables == nil { if n.Variables == nil {
n.Variables = make(map[string]*list.List) n.Variables = make(map[string]*list.List)
} }
log.Debug("SetVariable - %v to %v", name, NodesToString(value))
n.Variables[name] = value n.Variables[name] = value
} }

View File

@ -112,7 +112,7 @@ type Operation struct {
OperationType *operationType OperationType *operationType
Value interface{} Value interface{}
StringValue string StringValue string
CandidateNode *CandidateNode // used for Value Path elements ValueNodes *list.List // used for Value Path elements
Preferences interface{} Preferences interface{}
UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs UpdateAssign bool // used for assign ops, when true it means we evaluate the rhs given the lhs
} }
@ -138,12 +138,13 @@ func createScalarNode(value interface{}, stringValue string) *yaml.Node {
func createValueOperation(value interface{}, stringValue string) *Operation { func createValueOperation(value interface{}, stringValue string) *Operation {
var node *yaml.Node = createScalarNode(value, stringValue) var node *yaml.Node = createScalarNode(value, stringValue)
list := list.New()
list.PushBack(&CandidateNode{Node: node})
return &Operation{ return &Operation{
OperationType: valueOpType, OperationType: valueOpType,
Value: value, Value: value,
StringValue: stringValue, StringValue: stringValue,
CandidateNode: &CandidateNode{Node: node}, ValueNodes: list,
} }
} }

View File

@ -13,7 +13,8 @@ type envOpPreferences struct {
} }
func envOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { func envOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
envName := expressionNode.Operation.CandidateNode.Node.Value envNameNode := expressionNode.Operation.ValueNodes.Front().Value.(*CandidateNode)
envName := envNameNode.Node.Value
log.Debug("EnvOperator, env name:", envName) log.Debug("EnvOperator, env name:", envName)
rawValue := os.Getenv(envName) rawValue := os.Getenv(envName)

View File

@ -136,7 +136,9 @@ func applyAssignment(d *dataTreeNavigator, context Context, pathIndexToStartFrom
} else { } else {
log.Debugf("merge - assignmentOp := &Operation{OperationType: assignAttributesOpType}") log.Debugf("merge - assignmentOp := &Operation{OperationType: assignAttributesOpType}")
} }
rhsOp := &Operation{OperationType: valueOpType, CandidateNode: rhs} valueNodes := list.New()
valueNodes.PushBack(rhs)
rhsOp := &Operation{OperationType: valueOpType, ValueNodes: valueNodes}
assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: createTraversalTree(lhsPath, preferences.TraversePrefs, rhs.IsMapKey), Rhs: &ExpressionNode{Operation: rhsOp}} assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: createTraversalTree(lhsPath, preferences.TraversePrefs, rhs.IsMapKey), Rhs: &ExpressionNode{Operation: rhsOp}}

View File

@ -24,7 +24,7 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
arrayExpNode := expressionNode.Lhs.Lhs arrayExpNode := expressionNode.Lhs.Lhs
array, err := d.GetMatchingNodes(context, arrayExpNode) array, err := d.GetMatchingNodes(context, arrayExpNode)
log.Debugf("array of %v things", array.MatchingNodes.Len()) log.Debugf("reducing %v", NodesToString(array.MatchingNodes))
if err != nil { if err != nil {
return Context{}, err return Context{}, err
@ -39,6 +39,8 @@ func reduceOperator(d *dataTreeNavigator, context Context, expressionNode *Expre
return Context{}, err return Context{}, err
} }
log.Debugf("initialised with %v", NodesToString(accum.MatchingNodes))
log.Debugf("with variable %v", variableName) log.Debugf("with variable %v", variableName)
blockExp := expressionNode.Rhs.Rhs blockExp := expressionNode.Rhs.Rhs

View File

@ -1,6 +1,5 @@
package yqlib package yqlib
func valueOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { func valueOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debug("value = %v", expressionNode.Operation.CandidateNode.Node.Value) return context.ChildContext(expressionNode.Operation.ValueNodes), nil
return context.SingleChildContext(expressionNode.Operation.CandidateNode), nil
} }

View File

@ -23,7 +23,9 @@ func compoundAssignFunction(d *dataTreeNavigator, context Context, expressionNod
for el := lhs.MatchingNodes.Front(); el != nil; el = el.Next() { for el := lhs.MatchingNodes.Front(); el != nil; el = el.Next() {
candidate := el.Value.(*CandidateNode) candidate := el.Value.(*CandidateNode)
valueOp.CandidateNode = candidate valueNodes := list.New()
valueNodes.PushBack(candidate)
valueOp.ValueNodes = valueNodes
valueExpression := &ExpressionNode{Operation: valueOp} valueExpression := &ExpressionNode{Operation: valueOp}
assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: valueExpression, Rhs: calculation(valueExpression, expressionNode.Rhs)} assignmentOpNode := &ExpressionNode{Operation: assignmentOp, Lhs: valueExpression, Rhs: calculation(valueExpression, expressionNode.Rhs)}
@ -83,7 +85,7 @@ func doCrossFunc(d *dataTreeNavigator, context Context, expressionNode *Expressi
if err != nil { if err != nil {
return Context{}, err return Context{}, err
} }
log.Debugf("crossFunction LHS len: %v", lhs.MatchingNodes.Len()) log.Debugf("crossFunction LHS %v", NodesToString(lhs.MatchingNodes))
rhs, err := d.GetMatchingNodes(context, expressionNode.Rhs) rhs, err := d.GetMatchingNodes(context, expressionNode.Rhs)

View File

@ -0,0 +1,142 @@
package yqlib
import (
"container/list"
"fmt"
"io"
"os"
yaml "gopkg.in/yaml.v3"
)
type ReduceEvaluator interface {
EvaluateFiles(reduceExpression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error
}
type reduceEvaluator struct {
treeNavigator DataTreeNavigator
treeCreator ExpressionParser
reduceLhs *ExpressionNode
fileIndex int
}
func NewReduceEvaluator() ReduceEvaluator {
treeCreator := NewExpressionParser()
reduceLhs, err := treeCreator.ParseExpression(". as $doc")
if err != nil {
panic(err)
}
return &reduceEvaluator{treeNavigator: NewDataTreeNavigator(), treeCreator: treeCreator, reduceLhs: reduceLhs}
}
func (r *reduceEvaluator) EvaluateFiles(expression string, filenames []string, printer Printer, leadingContentPreProcessing bool) error {
node, err := r.treeCreator.ParseExpression(expression)
if err != nil {
return err
}
log.Debug("node %v", node.Operation.toString())
if node.Operation.OperationType != blockOpType {
return fmt.Errorf("Invalid reduce expression - expected '<initialValue>; <block that uses $doc>' got '%v'", expression)
}
currentValue := node.Lhs
reduceExp := node.Rhs
firstLeadingContent := ""
log.Debug("initialValue %v", currentValue.Operation.toString())
log.Debug("reduce Exp %v", reduceExp.Operation.toString())
for index, filename := range filenames {
reader, leadingContent, err := readStream(filename, leadingContentPreProcessing)
if index == 0 {
firstLeadingContent = leadingContent
}
if err != nil {
return err
}
currentValue, err = r.ReduceFile(filename, leadingContent, reader, currentValue, reduceExp)
if err != nil {
return err
}
switch reader := reader.(type) {
case *os.File:
safelyCloseFile(reader)
}
}
result := currentValue.Operation.ValueNodes
if result.Len() > 0 {
result.Front().Value.(*CandidateNode).Node.HeadComment = firstLeadingContent
}
printer.PrintResults(result)
return nil
}
func (r *reduceEvaluator) createReduceOp(initialValue *ExpressionNode, reduceExp *ExpressionNode) *ExpressionNode {
reduceBlock := &ExpressionNode{
Operation: &Operation{OperationType: blockOpType},
Lhs: initialValue,
Rhs: reduceExp,
}
return &ExpressionNode{
Operation: &Operation{OperationType: reduceOpType},
Lhs: r.reduceLhs,
Rhs: reduceBlock,
}
}
func (r *reduceEvaluator) ReduceFile(filename string, leadingContent string, reader io.Reader, initialValue *ExpressionNode, reduceExp *ExpressionNode) (*ExpressionNode, error) {
var currentIndex uint
var currentValue = initialValue
decoder := yaml.NewDecoder(reader)
for {
var dataBucket yaml.Node
errorReading := decoder.Decode(&dataBucket)
if errorReading == io.EOF {
r.fileIndex = r.fileIndex + 1
return currentValue, nil
} else if errorReading != nil {
return currentValue, errorReading
}
candidateNode := &CandidateNode{
Document: currentIndex,
Filename: filename,
Node: &dataBucket,
FileIndex: r.fileIndex,
}
inputList := list.New()
inputList.PushBack(candidateNode)
reduceOp := r.createReduceOp(currentValue, reduceExp)
// log.Debug("reduce - currentValueBefore: %v", NodesToString(currentValue.Operation.ValueNodes))
result, errorParsing := r.treeNavigator.GetMatchingNodes(Context{MatchingNodes: inputList}, reduceOp)
if errorParsing != nil {
return currentValue, errorParsing
}
currentValue = &ExpressionNode{
Operation: &Operation{
OperationType: valueOpType,
ValueNodes: result.MatchingNodes,
},
}
log.Debug("reduce - currentValueAfter: %v", NodesToString(currentValue.Operation.ValueNodes))
currentIndex = currentIndex + 1
}
}