Refactoring

This commit is contained in:
Mike Farah 2020-10-11 11:24:22 +11:00
parent e6fd6905eb
commit e0d1aed5b9
6 changed files with 254 additions and 152 deletions

View File

@ -522,7 +522,7 @@ func TestDataTreeNavigatorAnd(t *testing.T) {
test.AssertResult(t, expected, resultsToString(results))
}
func TestDataTreeNavigatorEquals(t *testing.T) {
func TestDataTreeNavigatorEqualsSimple(t *testing.T) {
nodes := readDoc(t, `a:
cat: {b: apple, c: yes}
@ -549,6 +549,94 @@ func TestDataTreeNavigatorEquals(t *testing.T) {
test.AssertResult(t, expected, resultsToString(results))
}
func TestDataTreeNavigatorEqualsSelf(t *testing.T) {
nodes := readDoc(t, `a: frog
b: cat
c: frog`)
path, errPath := treeCreator.ParsePath("(a or b).(. == frog)")
if errPath != nil {
t.Error(errPath)
}
results, errNav := treeNavigator.GetMatchingNodes(nodes, path)
if errNav != nil {
t.Error(errNav)
}
expected := `
-- Node --
Document 0, path: [a]
Tag: !!str, Kind: ScalarNode, Anchor:
frog
`
test.AssertResult(t, expected, resultsToString(results))
}
func TestDataTreeNavigatorEqualsNested(t *testing.T) {
nodes := readDoc(t, `a: {t: frog}
b: {t: cat}
c: {t: frog}`)
path, errPath := treeCreator.ParsePath("(t == frog)")
if errPath != nil {
t.Error(errPath)
}
results, errNav := treeNavigator.GetMatchingNodes(nodes, path)
if errNav != nil {
t.Error(errNav)
}
expected := `
-- Node --
Document 0, path: [a]
Tag: !!map, Kind: MappingNode, Anchor:
{t: frog}
-- Node --
Document 0, path: [c]
Tag: !!map, Kind: MappingNode, Anchor:
{t: frog}
`
test.AssertResult(t, expected, resultsToString(results))
}
func TestDataTreeNavigatorArrayEqualsSelf(t *testing.T) {
nodes := readDoc(t, `- cat
- dog
- frog`)
path, errPath := treeCreator.ParsePath("*(. == *og)")
if errPath != nil {
t.Error(errPath)
}
results, errNav := treeNavigator.GetMatchingNodes(nodes, path)
if errNav != nil {
t.Error(errNav)
}
expected := `
-- Node --
Document 0, path: [1]
Tag: !!str, Kind: ScalarNode, Anchor:
dog
-- Node --
Document 0, path: [2]
Tag: !!str, Kind: ScalarNode, Anchor:
frog
`
test.AssertResult(t, expected, resultsToString(results))
}
func TestDataTreeNavigatorArrayEquals(t *testing.T) {
nodes := readDoc(t, `- { b: apple, animal: rabbit }

View File

@ -77,26 +77,46 @@ func EqualsOperator(d *dataTreeNavigator, matchMap *orderedmap.OrderedMap, pathN
var results = orderedmap.NewOrderedMap()
for el := matchMap.Front(); el != nil; el = el.Next() {
children, err := splatNode(d, el.Value.(*CandidateNode))
candidate := el.Value.(*CandidateNode)
valuePattern := pathNode.Rhs.PathElement.StringValue
log.Debug("checking %v", candidate)
if pathNode.Lhs.PathElement.PathElementType == SelfReference {
if Match(candidate.Node.Value, valuePattern) {
results.Set(el.Key, el.Value)
}
} else {
errInChild := findMatchingChildren(d, results, candidate, pathNode.Lhs, valuePattern)
if errInChild != nil {
return nil, errInChild
}
}
}
return results, nil
}
func findMatchingChildren(d *dataTreeNavigator, results *orderedmap.OrderedMap, candidate *CandidateNode, lhs *PathTreeNode, valuePattern string) error {
children, err := splatNode(d, candidate)
log.Debugf("-- splatted matches, ")
if err != nil {
return nil, err
return err
}
for childEl := children.Front(); childEl != nil; childEl = childEl.Next() {
childMap := orderedmap.NewOrderedMap()
childMap.Set(childEl.Key, childEl.Value)
childMatches, errChild := d.getMatchingNodes(childMap, pathNode.Lhs)
childMatches, errChild := d.getMatchingNodes(childMap, lhs)
log.Debug("got the LHS")
if errChild != nil {
return nil, errChild
return errChild
}
if containsMatchingValue(childMatches, pathNode.Rhs.PathElement.StringValue) {
if containsMatchingValue(childMatches, valuePattern) {
results.Set(childEl.Key, childEl.Value)
}
}
}
return results, nil
return nil
}
func containsMatchingValue(matchMap *orderedmap.OrderedMap, valuePattern string) bool {
@ -104,10 +124,12 @@ func containsMatchingValue(matchMap *orderedmap.OrderedMap, valuePattern string)
for el := matchMap.Front(); el != nil; el = el.Next() {
node := el.Value.(*CandidateNode)
log.Debugf("-- compating %v to %v", node.Node.Value, valuePattern)
if Match(node.Node.Value, valuePattern) {
return true
}
}
log.Debugf("-- done findMatchingValues")
return false
}

View File

@ -3,8 +3,6 @@ package treeops
import (
"errors"
"fmt"
lex "github.com/timtadh/lexmachine"
)
var precedenceMap map[int]int
@ -15,6 +13,9 @@ const (
PathKey PathElementType = 1 << iota
ArrayIndex
Operation
SelfReference
OpenBracket
CloseBracket
)
type OperationType uint32
@ -25,7 +26,6 @@ const (
Or
And
Equals
EqualsSelf
Assign
DeleteChild
)
@ -45,6 +45,8 @@ func (p *PathElement) toString() string {
result = result + fmt.Sprintf("PathKey - '%v'\n", p.Value)
case ArrayIndex:
result = result + fmt.Sprintf("ArrayIndex - '%v'\n", p.Value)
case SelfReference:
result = result + fmt.Sprintf("SELF\n")
case Operation:
result = result + "Operation - "
switch p.OperationType {
@ -54,8 +56,6 @@ func (p *PathElement) toString() string {
result = result + "AND\n"
case Equals:
result = result + "EQUALS\n"
case EqualsSelf:
result = result + "EQUALS SELF\n"
case Assign:
result = result + "ASSIGN\n"
case Traverse:
@ -69,43 +69,13 @@ func (p *PathElement) toString() string {
return result
}
var operationTypeMapper map[int]OperationType
func initMaps() {
precedenceMap = make(map[int]int)
operationTypeMapper = make(map[int]OperationType)
precedenceMap[TokenIds["("]] = 0
precedenceMap[TokenIds["OR_OPERATOR"]] = 10
operationTypeMapper[TokenIds["OR_OPERATOR"]] = Or
precedenceMap[TokenIds["AND_OPERATOR"]] = 20
operationTypeMapper[TokenIds["AND_OPERATOR"]] = And
precedenceMap[TokenIds["EQUALS_OPERATOR"]] = 30
operationTypeMapper[TokenIds["EQUALS_OPERATOR"]] = Equals
precedenceMap[TokenIds["EQUALS_SELF_OPERATOR"]] = 30
operationTypeMapper[TokenIds["EQUALS_SELF_OPERATOR"]] = EqualsSelf
precedenceMap[TokenIds["DELETE_CHILD_OPERATOR"]] = 30
operationTypeMapper[TokenIds["DELETE_CHILD_OPERATOR"]] = DeleteChild
precedenceMap[TokenIds["ASSIGN_OPERATOR"]] = 35
operationTypeMapper[TokenIds["ASSIGN_OPERATOR"]] = Assign
precedenceMap[TokenIds["TRAVERSE_OPERATOR"]] = 40
operationTypeMapper[TokenIds["TRAVERSE_OPERATOR"]] = Traverse
}
func createOperationPathElement(opToken *lex.Token) PathElement {
var pathElement = PathElement{PathElementType: Operation, OperationType: operationTypeMapper[opToken.Type]}
func createOperationPathElement(opToken *Token) PathElement {
var pathElement = PathElement{PathElementType: Operation, OperationType: opToken.OperationType}
return pathElement
}
type PathPostFixer interface {
ConvertToPostfix([]*lex.Token) ([]*PathElement, error)
ConvertToPostfix([]*Token) ([]*PathElement, error)
}
type pathPostFixer struct {
@ -115,28 +85,29 @@ func NewPathPostFixer() PathPostFixer {
return &pathPostFixer{}
}
func popOpToResult(opStack []*lex.Token, result []*PathElement) ([]*lex.Token, []*PathElement) {
var operatorToPushToPostFix *lex.Token
func popOpToResult(opStack []*Token, result []*PathElement) ([]*Token, []*PathElement) {
var operatorToPushToPostFix *Token
opStack, operatorToPushToPostFix = opStack[0:len(opStack)-1], opStack[len(opStack)-1]
var pathElement = createOperationPathElement(operatorToPushToPostFix)
return opStack, append(result, &pathElement)
}
func (p *pathPostFixer) ConvertToPostfix(infixTokens []*lex.Token) ([]*PathElement, error) {
func (p *pathPostFixer) ConvertToPostfix(infixTokens []*Token) ([]*PathElement, error) {
var result []*PathElement
// surround the whole thing with quotes
var opStack = []*lex.Token{&lex.Token{Type: TokenIds["("]}}
var tokens = append(infixTokens, &lex.Token{Type: TokenIds[")"]})
var opStack = []*Token{&Token{PathElementType: OpenBracket}}
var tokens = append(infixTokens, &Token{PathElementType: CloseBracket})
for _, token := range tokens {
switch token.Type {
case TokenIds["PATH_KEY"], TokenIds["ARRAY_INDEX"], TokenIds["[+]"], TokenIds["[*]"], TokenIds["**"]:
var pathElement = PathElement{PathElementType: PathKey, Value: token.Value, StringValue: fmt.Sprintf("%v", token.Value)}
switch token.PathElementType {
case PathKey, ArrayIndex, SelfReference:
var pathElement = PathElement{PathElementType: token.PathElementType, Value: token.Value, StringValue: token.StringValue}
result = append(result, &pathElement)
case TokenIds["("]:
case OpenBracket:
opStack = append(opStack, token)
case TokenIds[")"]:
for len(opStack) > 0 && opStack[len(opStack)-1].Type != TokenIds["("] {
case CloseBracket:
for len(opStack) > 0 && opStack[len(opStack)-1].PathElementType != OpenBracket {
opStack, result = popOpToResult(opStack, result)
}
if len(opStack) == 0 {
@ -144,10 +115,11 @@ func (p *pathPostFixer) ConvertToPostfix(infixTokens []*lex.Token) ([]*PathEleme
}
// now we should have ( as the last element on the opStack, get rid of it
opStack = opStack[0 : len(opStack)-1]
default:
var currentPrecedence = precedenceMap[token.Type]
var currentPrecedence = p.precendenceOf(token)
// pop off higher precedent operators onto the result
for len(opStack) > 0 && precedenceMap[opStack[len(opStack)-1].Type] >= currentPrecedence {
for len(opStack) > 0 && p.precendenceOf(opStack[len(opStack)-1]) >= currentPrecedence {
opStack, result = popOpToResult(opStack, result)
}
// add this operator to the opStack
@ -156,3 +128,19 @@ func (p *pathPostFixer) ConvertToPostfix(infixTokens []*lex.Token) ([]*PathEleme
}
return result, nil
}
func (p *pathPostFixer) precendenceOf(token *Token) int {
switch token.OperationType {
case Or:
return 10
case And:
return 20
case Equals, DeleteChild:
return 30
case Assign:
return 35
case Traverse:
return 40
}
return 0
}

View File

@ -26,8 +26,16 @@ func testExpression(expression string) (string, error) {
}
func TestPostFixArrayEquals(t *testing.T) {
var infix = "a"
var expectedOutput = `PathKey - 'a'
var infix = "animals(.== cat)"
var expectedOutput = `PathKey - 'animals'
--------
SELF
--------
PathKey - 'cat'
--------
Operation - EQUALS
--------
Operation - TRAVERSE
--------
`

View File

@ -2,54 +2,48 @@ package treeops
import (
"strconv"
"strings"
lex "github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
)
var Literals []string // The tokens representing literal strings
var Keywords []string // The keyword tokens
var Tokens []string // All of the tokens (including literals and keywords)
var TokenIds map[string]int // A map from the token names to their int ids
var bracketLiterals []string
func initTokens() {
bracketLiterals = []string{"(", ")"}
Literals = []string{ // these need a traverse operator infront
"[+]",
"[*]",
"**",
}
Tokens = []string{
"OR_OPERATOR",
"AND_OPERATOR",
"EQUALS_OPERATOR",
"EQUALS_SELF_OPERATOR",
"ASSIGN_OPERATOR",
"DELETE_CHILD_OPERATOR",
"TRAVERSE_OPERATOR",
"PATH_KEY", // apples
"ARRAY_INDEX", // 123
}
Tokens = append(Tokens, bracketLiterals...)
Tokens = append(Tokens, Literals...)
TokenIds = make(map[string]int)
for i, tok := range Tokens {
TokenIds[tok] = i
}
initMaps()
}
func skip(*lex.Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
func token(name string) lex.Action {
type Token struct {
PathElementType PathElementType
OperationType OperationType
Value interface{}
StringValue string
AgainstSelf bool
CheckForPreTraverse bool // this token can sometimes have the traverse '.' missing in frnot of it
// e.g. a[1] should really be a.[1]
CheckForPostTraverse bool // samething but for post, e.g. [1]cat should really be [1].cat
}
func pathToken(wrapped bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return s.Token(TokenIds[name], string(m.Bytes), m), nil
value := string(m.Bytes)
if wrapped {
value = unwrap(value)
}
return &Token{PathElementType: PathKey, OperationType: None, Value: value, StringValue: value}, nil
}
}
func opToken(op OperationType, againstSelf bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
return &Token{PathElementType: Operation, OperationType: op, Value: value, StringValue: value, AgainstSelf: againstSelf}, nil
}
}
func literalToken(pType PathElementType, literal string, checkForPre bool, checkForPost bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return &Token{PathElementType: pType, Value: literal, StringValue: literal, CheckForPreTraverse: checkForPre, CheckForPostTraverse: checkForPost}, nil
}
}
@ -57,13 +51,7 @@ func unwrap(value string) string {
return value[1 : len(value)-1]
}
func wrappedToken(name string) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
return s.Token(TokenIds[name], unwrap(string(m.Bytes)), m), nil
}
}
func numberToken(name string, wrapped bool) lex.Action {
func arrayIndextoken(wrapped bool, checkForPre bool, checkForPost bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
if wrapped {
@ -73,33 +61,40 @@ func numberToken(name string, wrapped bool) lex.Action {
if errParsingInt != nil {
return nil, errParsingInt
}
return s.Token(TokenIds[name], number, m), nil
return &Token{PathElementType: ArrayIndex, Value: number, StringValue: numberString, CheckForPreTraverse: checkForPre, CheckForPostTraverse: checkForPost}, nil
}
}
// Creates the lexer object and compiles the NFA.
func initLexer() (*lex.Lexer, error) {
lexer := lex.NewLexer()
for _, lit := range bracketLiterals {
r := "\\" + strings.Join(strings.Split(lit, ""), "\\")
lexer.Add([]byte(r), token(lit))
}
for _, lit := range Literals {
r := "\\" + strings.Join(strings.Split(lit, ""), "\\")
lexer.Add([]byte(r), token(lit))
}
lexer.Add([]byte(`([Oo][Rr])`), token("OR_OPERATOR"))
lexer.Add([]byte(`([Aa][Nn][Dd])`), token("AND_OPERATOR"))
lexer.Add([]byte(`\.\s*==\s*`), token("EQUALS_SELF_OPERATOR"))
lexer.Add([]byte(`\s*==\s*`), token("EQUALS_OPERATOR"))
lexer.Add([]byte(`\s*.-\s*`), token("DELETE_CHILD_OPERATOR"))
lexer.Add([]byte(`\s*:=\s*`), token("ASSIGN_OPERATOR"))
lexer.Add([]byte(`\[-?[0-9]+\]`), numberToken("ARRAY_INDEX", true))
lexer.Add([]byte(`-?[0-9]+`), numberToken("ARRAY_INDEX", false))
lexer.Add([]byte(`\(`), literalToken(OpenBracket, "(", true, false))
lexer.Add([]byte(`\)`), literalToken(CloseBracket, ")", false, true))
lexer.Add([]byte(`\[\+\]`), literalToken(PathKey, "[+]", true, true))
lexer.Add([]byte(`\[\*\]`), literalToken(PathKey, "[*]", true, true))
lexer.Add([]byte(`\*\*`), literalToken(PathKey, "**", false, false))
lexer.Add([]byte(`([Oo][Rr])`), opToken(Or, false))
lexer.Add([]byte(`([Aa][Nn][Dd])`), opToken(And, false))
lexer.Add([]byte(`\.\s*==\s*`), opToken(Equals, true))
lexer.Add([]byte(`\s*==\s*`), opToken(Equals, false))
lexer.Add([]byte(`\.\s*.-\s*`), opToken(DeleteChild, true))
lexer.Add([]byte(`\s*.-\s*`), opToken(DeleteChild, false))
lexer.Add([]byte(`\.\s*:=\s*`), opToken(Assign, true))
lexer.Add([]byte(`\s*:=\s*`), opToken(Assign, false))
lexer.Add([]byte(`\[-?[0-9]+\]`), arrayIndextoken(true, true, true))
lexer.Add([]byte(`-?[0-9]+`), arrayIndextoken(false, false, false))
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
lexer.Add([]byte(`"[^ "]+"`), wrappedToken("PATH_KEY"))
lexer.Add([]byte(`[^ \.\[\(\)=]+`), token("PATH_KEY"))
lexer.Add([]byte(`\.`), token("TRAVERSE_OPERATOR"))
lexer.Add([]byte(`"[^ "]+"`), pathToken(true))
lexer.Add([]byte(`[^ \.\[\(\)=]+`), pathToken(false))
lexer.Add([]byte(`\.`), opToken(Traverse, false))
err := lexer.Compile()
if err != nil {
return nil, err
@ -108,7 +103,7 @@ func initLexer() (*lex.Lexer, error) {
}
type PathTokeniser interface {
Tokenise(path string) ([]*lex.Token, error)
Tokenise(path string) ([]*Token, error)
}
type pathTokeniser struct {
@ -116,7 +111,6 @@ type pathTokeniser struct {
}
func NewPathTokeniser() PathTokeniser {
initTokens()
var lexer, err = initLexer()
if err != nil {
panic(err)
@ -124,38 +118,40 @@ func NewPathTokeniser() PathTokeniser {
return &pathTokeniser{lexer}
}
func (p *pathTokeniser) Tokenise(path string) ([]*lex.Token, error) {
func (p *pathTokeniser) Tokenise(path string) ([]*Token, error) {
scanner, err := p.lexer.Scanner([]byte(path))
if err != nil {
return nil, err
}
var tokens []*lex.Token
var tokens []*Token
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if tok != nil {
token := tok.(*lex.Token)
log.Debugf("Tokenising %v - %v", token.Value, Tokens[token.Type])
token := tok.(*Token)
log.Debugf("Tokenising %v", token.Value)
tokens = append(tokens, token)
}
if err != nil {
return nil, err
}
}
var postProcessedTokens []*lex.Token = make([]*lex.Token, 0)
var postProcessedTokens = make([]*Token, 0)
for index, token := range tokens {
for _, literalTokenDef := range append(Literals, "ARRAY_INDEX", "(") {
if index > 0 && token.Type == TokenIds[literalTokenDef] && tokens[index-1].Type == TokenIds["PATH_KEY"] {
postProcessedTokens = append(postProcessedTokens, &lex.Token{Type: TokenIds["TRAVERSE_OPERATOR"], Value: "."})
if index > 0 && token.CheckForPreTraverse &&
(tokens[index-1].PathElementType == PathKey || tokens[index-1].PathElementType == CloseBracket) {
postProcessedTokens = append(postProcessedTokens, &Token{PathElementType: Operation, OperationType: Traverse, Value: "."})
}
if token.PathElementType == Operation && token.AgainstSelf {
postProcessedTokens = append(postProcessedTokens, &Token{PathElementType: SelfReference, Value: "SELF"})
}
postProcessedTokens = append(postProcessedTokens, token)
for _, literalTokenDef := range append(Literals, "ARRAY_INDEX", ")") {
if index != len(tokens)-1 && token.Type == TokenIds[literalTokenDef] && tokens[index+1].Type == TokenIds["PATH_KEY"] {
postProcessedTokens = append(postProcessedTokens, &lex.Token{Type: TokenIds["TRAVERSE_OPERATOR"], Value: "."})
}
if index != len(tokens)-1 && token.CheckForPostTraverse &&
tokens[index+1].PathElementType == PathKey {
postProcessedTokens = append(postProcessedTokens, &Token{PathElementType: Operation, OperationType: Traverse, Value: "."})
}
}

View File

@ -33,10 +33,10 @@ var tokeniserTests = []struct {
{"a", append(make([]interface{}, 0), "a")},
{"\"a.b\".c", append(make([]interface{}, 0), "a.b", ".", "c")},
{`b."foo.bar"`, append(make([]interface{}, 0), "b", ".", "foo.bar")},
{"animals(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "cat", ")")},
{"animals.(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "cat", ")")},
{"animals(. == cat)", append(make([]interface{}, 0), "animals", ".", "(", ". == ", "cat", ")")},
{"animals(.==c*)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "c*", ")")},
{"animals(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", "SELF", ".==", "cat", ")")},
{"animals.(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", "SELF", ".==", "cat", ")")},
{"animals(. == cat)", append(make([]interface{}, 0), "animals", ".", "(", "SELF", ". == ", "cat", ")")},
{"animals(.==c*)", append(make([]interface{}, 0), "animals", ".", "(", "SELF", ".==", "c*", ")")},
{"animals(a.b==c*)", append(make([]interface{}, 0), "animals", ".", "(", "a", ".", "b", "==", "c*", ")")},
{"animals.(a.b==c*)", append(make([]interface{}, 0), "animals", ".", "(", "a", ".", "b", "==", "c*", ")")},
{"(a.b==c*).animals", append(make([]interface{}, 0), "(", "a", ".", "b", "==", "c*", ")", ".", "animals")},