From a8bdc12d8341264ffe254a6f2472107e3d6e35e4 Mon Sep 17 00:00:00 2001 From: Mike Farah Date: Sun, 20 Sep 2020 22:40:09 +1000 Subject: [PATCH] to postfix wip --- pkg/yqlib/path_postfix.go | 152 +++++++++++++++++++++++++++++++ pkg/yqlib/path_postfix_test.go | 82 +++++++++++++++++ pkg/yqlib/path_tokeniser.go | 10 +- pkg/yqlib/path_tokeniser_test.go | 1 + pkg/yqlib/path_tree.go | 24 ----- 5 files changed, 243 insertions(+), 26 deletions(-) create mode 100644 pkg/yqlib/path_postfix.go create mode 100644 pkg/yqlib/path_postfix_test.go diff --git a/pkg/yqlib/path_postfix.go b/pkg/yqlib/path_postfix.go new file mode 100644 index 00000000..7f40efab --- /dev/null +++ b/pkg/yqlib/path_postfix.go @@ -0,0 +1,152 @@ +package yqlib + +import ( + "errors" + "fmt" + + lex "github.com/timtadh/lexmachine" +) + +var precedenceMap map[int]int + +type PathElementType uint32 + +const ( + PathKey PathElementType = 1 << iota + ArrayIndex + Operation +) + +type OperationType uint32 + +const ( + None OperationType = 1 << iota + Or + And + Equals +) + +type PathElement struct { + PathElementType PathElementType + OperationType OperationType + Value interface{} + ChildElements [][]*PathElement + Finished bool +} + +// debugging purposes only +func (p *PathElement) toString() string { + var result string = `Type: ` + switch p.PathElementType { + case PathKey: + result = result + fmt.Sprintf("PathKey - %v\n", p.Value) + case ArrayIndex: + result = result + fmt.Sprintf("ArrayIndex - %v\n", p.Value) + case Operation: + result = result + "Operation - " + switch p.OperationType { + case Or: + result = result + "OR\n" + case And: + result = result + "AND\n" + case Equals: + result = result + "EQUALS\n" + } + } + return result +} + +var operationTypeMapper map[int]OperationType + +func initMaps() { + precedenceMap = make(map[int]int) + operationTypeMapper = make(map[int]OperationType) + + precedenceMap[TokenIds["("]] = 0 + + precedenceMap[TokenIds["OR_OPERATOR"]] = 10 + operationTypeMapper[TokenIds["OR_OPERATOR"]] = Or + + precedenceMap[TokenIds["AND_OPERATOR"]] = 20 + operationTypeMapper[TokenIds["AND_OPERATOR"]] = And + + precedenceMap[TokenIds["EQUALS_OPERATOR"]] = 30 + operationTypeMapper[TokenIds["EQUALS_OPERATOR"]] = Equals +} + +func createOperationPathElement(opToken *lex.Token) PathElement { + var childElements = make([][]*PathElement, 2) + var pathElement = PathElement{PathElementType: Operation, OperationType: operationTypeMapper[opToken.Type], ChildElements: childElements} + return pathElement +} + +type PathPostFixer interface { + ConvertToPostfix([]*lex.Token) ([]*PathElement, error) +} + +type pathPostFixer struct { +} + +func NewPathPostFixer() PathPostFixer { + return &pathPostFixer{} +} + +func popOpToResult(opStack []*lex.Token, result []*PathElement) ([]*lex.Token, []*PathElement) { + var operatorToPushToPostFix *lex.Token + opStack, operatorToPushToPostFix = opStack[0:len(opStack)-1], opStack[len(opStack)-1] + var pathElement = createOperationPathElement(operatorToPushToPostFix) + return opStack, append(result, &pathElement) +} + +func finishPathKey(result []*PathElement) { + if len(result) > 0 { + //need to mark PathKey elements as finished so we + //stop appending PathKeys as children + result[len(result)-1].Finished = true + } +} + +func (p *pathPostFixer) ConvertToPostfix(infixTokens []*lex.Token) ([]*PathElement, error) { + var result []*PathElement + // surround the whole thing with quotes + var opStack = []*lex.Token{&lex.Token{Type: TokenIds["("]}} + var tokens = append(infixTokens, &lex.Token{Type: TokenIds[")"]}) + + for _, token := range tokens { + switch token.Type { + case TokenIds["PATH_KEY"]: // handle splats and array appends here too + var emptyArray = [][]*PathElement{make([]*PathElement, 0)} + var pathElement = PathElement{PathElementType: PathKey, Value: token.Value, ChildElements: emptyArray} + + if len(result) > 0 && result[len(result)-1].PathElementType == PathKey && !result[len(result)-1].Finished { + var lastElement = result[len(result)-1] + lastElement.ChildElements[0] = append(lastElement.ChildElements[0], &pathElement) + } else { + result = append(result, &pathElement) + } + case TokenIds["("]: + opStack = append(opStack, token) + finishPathKey(result) + case TokenIds["OR_OPERATOR"], TokenIds["AND_OPERATOR"], TokenIds["EQUALS_OPERATOR"]: + var currentPrecedence = precedenceMap[token.Type] + // pop off higher precedent operators onto the result + for len(opStack) > 0 && precedenceMap[opStack[len(opStack)-1].Type] > currentPrecedence { + opStack, result = popOpToResult(opStack, result) + } + // add this operator to the opStack + opStack = append(opStack, token) + finishPathKey(result) + case TokenIds[")"]: + for len(opStack) > 0 && opStack[len(opStack)-1].Type != TokenIds["("] { + opStack, result = popOpToResult(opStack, result) + } + if len(opStack) == 0 { + return nil, errors.New("Bad path expression, got close brackets without matching opening bracket") + } + // now we should have ( as the last element on the opStack, get rid of it + opStack = opStack[0 : len(opStack)-1] + finishPathKey(result) + } + } + return result, nil +} diff --git a/pkg/yqlib/path_postfix_test.go b/pkg/yqlib/path_postfix_test.go new file mode 100644 index 00000000..b377619b --- /dev/null +++ b/pkg/yqlib/path_postfix_test.go @@ -0,0 +1,82 @@ +package yqlib + +import ( + "testing" + + "github.com/mikefarah/yq/v3/test" +) + +// var tokeniser = NewPathTokeniser() +var postFixer = NewPathPostFixer() + +func testExpression(expression string) (string, error) { + tokens, err := tokeniser.Tokenise(expression) + if err != nil { + return "", err + } + results, errorP := postFixer.ConvertToPostfix(tokens) + if errorP != nil { + return "", errorP + } + formatted := "" + for _, path := range results { + formatted = formatted + path.toString() + "--------\n" + } + return formatted, nil +} + +func TestPostFixSimple(t *testing.T) { + var infix = "a" + var expectedOutput = "Type: PathKey - a\n" + + actual, err := testExpression(infix) + if err != nil { + t.Error(err) + } + + test.AssertResultComplex(t, expectedOutput, actual) +} + +func TestPostFixOr(t *testing.T) { + var infix = "a OR b" + var expectedOutput = `Type: PathKey - a +-------- +Type: PathKey - b +-------- +Type: Operation - OR +-------- +` + + actual, err := testExpression(infix) + if err != nil { + t.Error(err) + } + + test.AssertResultComplex(t, expectedOutput, actual) +} + +func TestPostFixOrWithEquals(t *testing.T) { + var infix = "a==thing OR b==thongs" + var expectedOutput = `Type: PathKey - a +-------- +Type: PathKey - thing +-------- +Type: Operation - EQUALS +-------- +Type: PathKey - b +-------- +Type: PathKey - thongs +-------- +Type: Operation - EQUALS +-------- +Type: Operation - OR +-------- +` + + actual, err := testExpression(infix) + if err != nil { + t.Error(err) + } + + test.AssertResultComplex(t, expectedOutput, actual) +} diff --git a/pkg/yqlib/path_tokeniser.go b/pkg/yqlib/path_tokeniser.go index 848ee149..009cdf60 100644 --- a/pkg/yqlib/path_tokeniser.go +++ b/pkg/yqlib/path_tokeniser.go @@ -22,7 +22,9 @@ func initTokens() { "**", } Tokens = []string{ - "OPERATION", // ==, OR, AND + "OR_OPERATOR", + "AND_OPERATOR", + "EQUALS_OPERATOR", "PATH_KEY", // apples "ARRAY_INDEX", // 1234 } @@ -31,6 +33,8 @@ func initTokens() { for i, tok := range Tokens { TokenIds[tok] = i } + + initMaps() } func skip(*lex.Scanner, *machines.Match) (interface{}, error) { @@ -74,7 +78,9 @@ func initLexer() (*lex.Lexer, error) { r := "\\" + strings.Join(strings.Split(lit, ""), "\\") lexer.Add([]byte(r), token(lit)) } - lexer.Add([]byte(`([Oo][Rr]|[Aa][Nn][Dd]|==)`), token("OPERATION")) + lexer.Add([]byte(`([Oo][Rr])`), token("OR_OPERATOR")) + lexer.Add([]byte(`([Aa][Nn][Dd])`), token("AND_OPERATOR")) + lexer.Add([]byte(`(==)`), token("EQUALS_OPERATOR")) lexer.Add([]byte(`\[-?[0-9]+\]`), numberToken("ARRAY_INDEX", true)) lexer.Add([]byte(`-?[0-9]+`), numberToken("ARRAY_INDEX", false)) lexer.Add([]byte("( |\t|\n|\r)+"), skip) diff --git a/pkg/yqlib/path_tokeniser_test.go b/pkg/yqlib/path_tokeniser_test.go index 710a98a6..75b00609 100644 --- a/pkg/yqlib/path_tokeniser_test.go +++ b/pkg/yqlib/path_tokeniser_test.go @@ -12,6 +12,7 @@ var tokeniserTests = []struct { }{ // TODO: Ensure ALL documented examples have tests! sheesh {"apples.BANANAS", append(make([]interface{}, 0), "apples", "BANANAS")}, + {"appl*.BANA*", append(make([]interface{}, 0), "appl*", "BANA*")}, {"a.b.**", append(make([]interface{}, 0), "a", "b", "**")}, {"a.\"=\".frog", append(make([]interface{}, 0), "a", "=", "frog")}, {"a.b.*", append(make([]interface{}, 0), "a", "b", "*")}, diff --git a/pkg/yqlib/path_tree.go b/pkg/yqlib/path_tree.go index 038d4015..cf0b1eaf 100644 --- a/pkg/yqlib/path_tree.go +++ b/pkg/yqlib/path_tree.go @@ -2,30 +2,6 @@ package yqlib import lex "github.com/timtadh/lexmachine" -type PathElementType uint32 - -const ( - PathKey PathElementType = 1 << iota - ArrayIndex - Operation -) - -type OperationType uint32 - -const ( - None OperationType = 1 << iota - Or - And - ChildEquals -) - -type PathElement struct { - PathElementType PathElementType - OperationType OperationType - Value interface{} - ChildElements [][]*PathElement -} - func parseTree(tokens []*lex.Token, currentElement *PathElement, allElements []*PathElement) []*PathElement { currentToken, remainingTokens := tokens[0], tokens[1:]