include traverse as a operator token

This commit is contained in:
Mike Farah 2020-09-24 10:52:45 +10:00
parent e037c57725
commit 95bc1e1599
4 changed files with 97 additions and 31 deletions

View File

@ -0,0 +1,23 @@
package yqlib
// import yaml "gopkg.in/yaml.v3"
// type NodeLeafContext struct {
// Node *yaml.Node
// Head interface{}
// PathStack []interface{}
// }
// func newNodeLeafContext(node *yaml.Node, head interface{}, tailpathStack []interface{}) NodeLeafContext {
// newPathStack := make([]interface{}, len(pathStack))
// copy(newPathStack, pathStack)
// return NodeContext{
// Node: node,
// Head: head,
// PathStack: newPathStack,
// }
// }
// type DataTreeNavigator interface {
// Traverse(value *NodeLeafContext)
// }

View File

@ -0,0 +1 @@
package yqlib

View File

@ -9,26 +9,34 @@ import (
) )
var Literals []string // The tokens representing literal strings var Literals []string // The tokens representing literal strings
var ClosingLiterals []string // The tokens representing literal strings
var Keywords []string // The keyword tokens var Keywords []string // The keyword tokens
var Tokens []string // All of the tokens (including literals and keywords) var Tokens []string // All of the tokens (including literals and keywords)
var TokenIds map[string]int // A map from the token names to their int ids var TokenIds map[string]int // A map from the token names to their int ids
func initTokens() { func initTokens() {
Literals = []string{ Literals = []string{ // these need a traverse operator infront
"(", "(",
")",
"[+]", "[+]",
"[*]", "[*]",
"**", "**",
} }
ClosingLiterals = []string{ // these need a traverse operator after
")",
}
Tokens = []string{ Tokens = []string{
"BEGIN_SUB_EXPRESSION",
"END_SUB_EXPRESSION",
"OR_OPERATOR", "OR_OPERATOR",
"AND_OPERATOR", "AND_OPERATOR",
"EQUALS_OPERATOR", "EQUALS_OPERATOR",
"EQUALS_SELF_OPERATOR",
"TRAVERSE_OPERATOR",
"PATH_KEY", // apples "PATH_KEY", // apples
"ARRAY_INDEX", // 1234 "ARRAY_INDEX", // 123
} }
Tokens = append(Tokens, Literals...) Tokens = append(Tokens, Literals...)
Tokens = append(Tokens, ClosingLiterals...)
TokenIds = make(map[string]int) TokenIds = make(map[string]int)
for i, tok := range Tokens { for i, tok := range Tokens {
TokenIds[tok] = i TokenIds[tok] = i
@ -78,15 +86,20 @@ func initLexer() (*lex.Lexer, error) {
r := "\\" + strings.Join(strings.Split(lit, ""), "\\") r := "\\" + strings.Join(strings.Split(lit, ""), "\\")
lexer.Add([]byte(r), token(lit)) lexer.Add([]byte(r), token(lit))
} }
for _, lit := range ClosingLiterals {
r := "\\" + strings.Join(strings.Split(lit, ""), "\\")
lexer.Add([]byte(r), token(lit))
}
lexer.Add([]byte(`([Oo][Rr])`), token("OR_OPERATOR")) lexer.Add([]byte(`([Oo][Rr])`), token("OR_OPERATOR"))
lexer.Add([]byte(`([Aa][Nn][Dd])`), token("AND_OPERATOR")) lexer.Add([]byte(`([Aa][Nn][Dd])`), token("AND_OPERATOR"))
lexer.Add([]byte(`(==)`), token("EQUALS_OPERATOR")) lexer.Add([]byte(`\.\s*==\s*`), token("EQUALS_SELF_OPERATOR"))
lexer.Add([]byte(`\s*==\s*`), token("EQUALS_OPERATOR"))
lexer.Add([]byte(`\[-?[0-9]+\]`), numberToken("ARRAY_INDEX", true)) lexer.Add([]byte(`\[-?[0-9]+\]`), numberToken("ARRAY_INDEX", true))
lexer.Add([]byte(`-?[0-9]+`), numberToken("ARRAY_INDEX", false)) lexer.Add([]byte(`-?[0-9]+`), numberToken("ARRAY_INDEX", false))
lexer.Add([]byte("( |\t|\n|\r)+"), skip) lexer.Add([]byte("( |\t|\n|\r)+"), skip)
lexer.Add([]byte(`"[^ "]+"`), wrappedToken("PATH_KEY")) lexer.Add([]byte(`"[^ "]+"`), wrappedToken("PATH_KEY"))
lexer.Add([]byte(`[^ \.\[\(\)=]+`), token("PATH_KEY")) lexer.Add([]byte(`[^ \.\[\(\)=]+`), token("PATH_KEY"))
lexer.Add([]byte(`\.`), skip) lexer.Add([]byte(`\.`), token("TRAVERSE_OPERATOR"))
err := lexer.Compile() err := lexer.Compile()
if err != nil { if err != nil {
return nil, err return nil, err
@ -129,6 +142,22 @@ func (p *pathTokeniser) Tokenise(path string) ([]*lex.Token, error) {
return nil, err return nil, err
} }
} }
var postProcessedTokens []*lex.Token = make([]*lex.Token, 0)
return tokens, nil for index, token := range tokens {
for _, literalTokenDef := range append(Literals, "ARRAY_INDEX") {
if index > 0 && token.Type == TokenIds[literalTokenDef] && tokens[index-1].Type != TokenIds["TRAVERSE_OPERATOR"] {
postProcessedTokens = append(postProcessedTokens, &lex.Token{Type: TokenIds["TRAVERSE_OPERATOR"], Value: "."})
}
}
postProcessedTokens = append(postProcessedTokens, token)
for _, literalTokenDef := range append(ClosingLiterals, "ARRAY_INDEX") {
if index != len(tokens)-1 && token.Type == TokenIds[literalTokenDef] && tokens[index+1].Type != TokenIds["TRAVERSE_OPERATOR"] {
postProcessedTokens = append(postProcessedTokens, &lex.Token{Type: TokenIds["TRAVERSE_OPERATOR"], Value: "."})
}
}
}
return postProcessedTokens, nil
} }

View File

@ -11,29 +11,42 @@ var tokeniserTests = []struct {
expectedTokens []interface{} expectedTokens []interface{}
}{ // TODO: Ensure ALL documented examples have tests! sheesh }{ // TODO: Ensure ALL documented examples have tests! sheesh
{"apples.BANANAS", append(make([]interface{}, 0), "apples", "BANANAS")}, {"apples.BANANAS", append(make([]interface{}, 0), "apples", ".", "BANANAS")},
{"appl*.BANA*", append(make([]interface{}, 0), "appl*", "BANA*")}, {"appl*.BANA*", append(make([]interface{}, 0), "appl*", ".", "BANA*")},
{"a.b.**", append(make([]interface{}, 0), "a", "b", "**")}, {"a.b.**", append(make([]interface{}, 0), "a", ".", "b", ".", "**")},
{"a.\"=\".frog", append(make([]interface{}, 0), "a", "=", "frog")}, {"a.\"=\".frog", append(make([]interface{}, 0), "a", ".", "=", ".", "frog")},
{"a.b.*", append(make([]interface{}, 0), "a", "b", "*")}, {"a.b.*", append(make([]interface{}, 0), "a", ".", "b", ".", "*")},
{"a.b.thin*", append(make([]interface{}, 0), "a", "b", "thin*")}, {"a.b.thin*", append(make([]interface{}, 0), "a", ".", "b", ".", "thin*")},
{"a.b[0]", append(make([]interface{}, 0), "a", "b", int64(0))}, {"a.b[0]", append(make([]interface{}, 0), "a", ".", "b", ".", int64(0))},
{"a.b[*]", append(make([]interface{}, 0), "a", "b", "[*]")}, {"a.b.[0]", append(make([]interface{}, 0), "a", ".", "b", ".", int64(0))},
{"a.b[-12]", append(make([]interface{}, 0), "a", "b", int64(-12))}, {"a.b[*]", append(make([]interface{}, 0), "a", ".", "b", ".", "[*]")},
{"a.b.0", append(make([]interface{}, 0), "a", "b", int64(0))}, {"a.b.[*]", append(make([]interface{}, 0), "a", ".", "b", ".", "[*]")},
{"a.b.d[+]", append(make([]interface{}, 0), "a", "b", "d", "[+]")}, {"a.b[+]", append(make([]interface{}, 0), "a", ".", "b", ".", "[+]")},
{"a.b.[+]", append(make([]interface{}, 0), "a", ".", "b", ".", "[+]")},
{"a.b[-12]", append(make([]interface{}, 0), "a", ".", "b", ".", int64(-12))},
{"a.b.0", append(make([]interface{}, 0), "a", ".", "b", ".", int64(0))},
{"a.b.-12", append(make([]interface{}, 0), "a", ".", "b", ".", int64(-12))},
{"a", append(make([]interface{}, 0), "a")}, {"a", append(make([]interface{}, 0), "a")},
{"\"a.b\".c", append(make([]interface{}, 0), "a.b", "c")}, {"\"a.b\".c", append(make([]interface{}, 0), "a.b", ".", "c")},
{`b."foo.bar"`, append(make([]interface{}, 0), "b", "foo.bar")}, {`b."foo.bar"`, append(make([]interface{}, 0), "b", ".", "foo.bar")},
{"animals(.==cat)", append(make([]interface{}, 0), "animals", "(", "==", "cat", ")")}, // TODO validate this dot is not a join? {"animals(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "cat", ")")},
{"animals(.==c*)", append(make([]interface{}, 0), "animals", "(", "==", "c*", ")")}, // TODO validate this dot is not a join? {"animals.(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "cat", ")")},
{"[1].a.d", append(make([]interface{}, 0), int64(1), "a", "d")}, {"animals(. == cat)", append(make([]interface{}, 0), "animals", ".", "(", ". == ", "cat", ")")},
{"a[0].c", append(make([]interface{}, 0), "a", int64(0), "c")}, {"animals(.==c*)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "c*", ")")},
{"animals(a.b==c*)", append(make([]interface{}, 0), "animals", ".", "(", "a", ".", "b", "==", "c*", ")")},
{"animals.(a.b==c*)", append(make([]interface{}, 0), "animals", ".", "(", "a", ".", "b", "==", "c*", ")")},
{"(a.b==c*).animals", append(make([]interface{}, 0), "(", "a", ".", "b", "==", "c*", ")", ".", "animals")},
{"(a.b==c*)animals", append(make([]interface{}, 0), "(", "a", ".", "b", "==", "c*", ")", ".", "animals")},
{"[1].a.d", append(make([]interface{}, 0), int64(1), ".", "a", ".", "d")},
{"[1]a.d", append(make([]interface{}, 0), int64(1), ".", "a", ".", "d")},
{"a[0]c", append(make([]interface{}, 0), "a", ".", int64(0), ".", "c")},
{"a.[0].c", append(make([]interface{}, 0), "a", ".", int64(0), ".", "c")},
{"[0]", append(make([]interface{}, 0), int64(0))}, {"[0]", append(make([]interface{}, 0), int64(0))},
{"a.cool(s.d.f==cool)", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", ")")}, {"0", append(make([]interface{}, 0), int64(0))},
{"a.cool(s.d.f==cool OR t.b.h==frog).caterpillar", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", "OR", "t", "b", "h", "==", "frog", ")", "caterpillar")}, {"a.cool(s.d.f == cool)", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", " == ", "cool", ")")},
{"a.cool(s.d.f==cool and t.b.h==frog)*", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", "and", "t", "b", "h", "==", "frog", ")", "*")}, {"a.cool.(s.d.f==cool OR t.b.h==frog).caterpillar", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", "==", "cool", "OR", "t", ".", "b", ".", "h", "==", "frog", ")", ".", "caterpillar")},
{"a.cool(s.d.f==cool and t.b.h==frog).th*", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", "and", "t", "b", "h", "==", "frog", ")", "th*")}, {"a.cool(s.d.f==cool and t.b.h==frog)*", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", "==", "cool", "and", "t", ".", "b", ".", "h", "==", "frog", ")", ".", "*")},
{"a.cool(s.d.f==cool and t.b.h==frog).th*", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", "==", "cool", "and", "t", ".", "b", ".", "h", "==", "frog", ")", ".", "th*")},
} }
var tokeniser = NewPathTokeniser() var tokeniser = NewPathTokeniser()