yq/pkg/yqlib/expression_tokeniser.go

610 lines
24 KiB
Go
Raw Normal View History

2020-11-03 23:48:43 +00:00
package yqlib
2020-09-17 11:58:01 +00:00
import (
2020-12-17 03:02:54 +00:00
"fmt"
2021-10-24 00:35:40 +00:00
"regexp"
2020-09-17 12:12:56 +00:00
"strconv"
2021-01-13 05:54:28 +00:00
"strings"
2020-09-17 11:58:01 +00:00
lex "github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
)
2020-10-11 00:24:22 +00:00
func skip(*lex.Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
2020-09-17 11:58:01 +00:00
type tokenType uint32
2020-10-20 02:53:26 +00:00
const (
2021-01-11 22:55:55 +00:00
operationToken = 1 << iota
openBracket
closeBracket
openCollect
closeCollect
openCollectObject
closeCollectObject
traverseArrayCollect
2020-10-20 02:53:26 +00:00
)
type token struct {
TokenType tokenType
2020-11-19 05:45:05 +00:00
Operation *Operation
2021-05-09 05:36:33 +00:00
AssignOperation *Operation // e.g. tag (GetTag) op becomes AssignTag if '=' follows it
CheckForPostTraverse bool // e.g. [1]cat should really be [1].cat
Match *machines.Match // match that created this token
2020-09-20 12:40:09 +00:00
2020-09-17 11:58:01 +00:00
}
2021-02-03 06:11:47 +00:00
func (t *token) toString(detail bool) string {
2021-01-11 22:55:55 +00:00
if t.TokenType == operationToken {
2021-02-03 06:11:47 +00:00
if detail {
return fmt.Sprintf("%v (%v)", t.Operation.toString(), t.Operation.OperationType.Precedence)
}
2020-10-20 04:33:20 +00:00
return t.Operation.toString()
2021-01-11 22:55:55 +00:00
} else if t.TokenType == openBracket {
2020-10-20 04:33:20 +00:00
return "("
2021-01-11 22:55:55 +00:00
} else if t.TokenType == closeBracket {
2020-10-20 04:33:20 +00:00
return ")"
2021-01-11 22:55:55 +00:00
} else if t.TokenType == openCollect {
2020-10-20 04:33:20 +00:00
return "["
2021-01-11 22:55:55 +00:00
} else if t.TokenType == closeCollect {
2020-10-20 04:33:20 +00:00
return "]"
2021-01-11 22:55:55 +00:00
} else if t.TokenType == openCollectObject {
2020-10-21 01:54:58 +00:00
return "{"
2021-01-11 22:55:55 +00:00
} else if t.TokenType == closeCollectObject {
2020-10-21 01:54:58 +00:00
return "}"
2021-01-11 22:55:55 +00:00
} else if t.TokenType == traverseArrayCollect {
2020-12-26 10:37:08 +00:00
return ".["
2020-10-20 04:33:20 +00:00
} else {
2020-11-13 03:07:11 +00:00
return "NFI"
2020-10-20 04:33:20 +00:00
}
}
2020-10-11 00:24:22 +00:00
func pathToken(wrapped bool) lex.Action {
2020-09-17 11:58:01 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2020-10-11 00:24:22 +00:00
value := string(m.Bytes)
2021-05-09 05:12:50 +00:00
prefs := traversePreferences{}
if value[len(value)-1:] == "?" {
prefs.OptionalTraverse = true
value = value[:len(value)-1]
}
2020-11-13 03:07:11 +00:00
value = value[1:]
2020-10-11 00:24:22 +00:00
if wrapped {
value = unwrap(value)
}
2020-11-22 02:50:32 +00:00
log.Debug("PathToken %v", value)
2021-05-09 05:12:50 +00:00
op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs}
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil
2020-10-19 09:05:38 +00:00
}
}
func opToken(op *operationType) lex.Action {
2020-11-19 05:45:05 +00:00
return opTokenWithPrefs(op, nil, nil)
2020-11-06 00:23:26 +00:00
}
func opAssignableToken(opType *operationType, assignOpType *operationType) lex.Action {
2020-11-19 05:45:05 +00:00
return opTokenWithPrefs(opType, assignOpType, nil)
}
2021-01-06 09:22:50 +00:00
func assignOpToken(updateAssign bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
log.Debug("assignOpToken %v", string(m.Bytes))
value := string(m.Bytes)
prefs := assignPreferences{DontOverWriteAnchor: true}
op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: prefs}
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: op}, nil
2021-01-06 09:22:50 +00:00
}
}
2022-01-22 05:40:17 +00:00
func multiplyWithPrefs(op *operationType) lex.Action {
2021-01-13 05:54:28 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
prefs := multiplyPreferences{}
options := string(m.Bytes)
if strings.Contains(options, "+") {
prefs.AppendArrays = true
}
if strings.Contains(options, "?") {
prefs.TraversePrefs = traversePreferences{DontAutoCreate: true}
}
if strings.Contains(options, "n") {
prefs.AssignPrefs = assignPreferences{OnlyWriteNull: true}
}
if strings.Contains(options, "d") {
prefs.DeepMergeArrays = true
}
2021-10-11 03:46:46 +00:00
prefs.TraversePrefs.DontFollowAlias = true
2022-01-22 05:40:17 +00:00
op := &Operation{OperationType: op, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs}
2021-01-13 05:54:28 +00:00
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func opTokenWithPrefs(op *operationType, assignOpType *operationType, preferences interface{}) lex.Action {
2020-10-11 00:24:22 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2020-11-22 02:50:32 +00:00
log.Debug("opTokenWithPrefs %v", string(m.Bytes))
2020-10-11 00:24:22 +00:00
value := string(m.Bytes)
2020-11-06 00:23:26 +00:00
op := &Operation{OperationType: op, Value: op.Type, StringValue: value, Preferences: preferences}
2020-11-19 05:45:05 +00:00
var assign *Operation
if assignOpType != nil {
assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences}
}
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil
2020-10-11 00:24:22 +00:00
}
2020-09-17 12:12:56 +00:00
}
2021-10-26 04:42:25 +00:00
func extractNumberParamter(value string) (int, error) {
parameterParser := regexp.MustCompile(`.*\(([0-9]+)\)`)
matches := parameterParser.FindStringSubmatch(value)
var indent, errParsingInt = strconv.ParseInt(matches[1], 10, 32)
2021-10-26 04:42:25 +00:00
if errParsingInt != nil {
return 0, errParsingInt
}
return int(indent), nil
}
2021-10-24 00:35:40 +00:00
2021-10-26 04:42:25 +00:00
func flattenWithDepth() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2021-10-24 00:35:40 +00:00
value := string(m.Bytes)
2021-10-26 04:42:25 +00:00
var depth, errParsingInt = extractNumberParamter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
2021-10-24 00:35:40 +00:00
2021-10-26 04:42:25 +00:00
prefs := flattenPreferences{depth: depth}
op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func encodeWithIndent(outputFormat PrinterOutputFormat) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
var indent, errParsingInt = extractNumberParamter(value)
2021-10-24 00:35:40 +00:00
if errParsingInt != nil {
return nil, errParsingInt
}
2021-10-26 04:42:25 +00:00
prefs := encoderPreferences{format: outputFormat, indent: indent}
op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs}
2021-10-24 00:35:40 +00:00
return &token{TokenType: operationToken, Operation: op}, nil
}
}
2021-01-06 09:22:50 +00:00
func assignAllCommentsOp(updateAssign bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
log.Debug("assignAllCommentsOp %v", string(m.Bytes))
value := string(m.Bytes)
op := &Operation{
OperationType: assignCommentOpType,
Value: assignCommentOpType.Type,
2021-01-06 09:22:50 +00:00
StringValue: value,
UpdateAssign: updateAssign,
Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true},
2021-01-06 09:22:50 +00:00
}
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: op}, nil
2021-01-06 09:22:50 +00:00
}
}
func literalToken(pType tokenType, checkForPost bool) lex.Action {
2020-09-17 12:12:56 +00:00
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2021-05-09 05:36:33 +00:00
return &token{TokenType: pType, CheckForPostTraverse: checkForPost, Match: m}, nil
2020-09-17 12:12:56 +00:00
}
}
2020-10-11 00:24:22 +00:00
func unwrap(value string) string {
return value[1 : len(value)-1]
}
2020-10-16 01:29:26 +00:00
func numberValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64)
2020-10-16 01:29:26 +00:00
if errParsingInt != nil {
return nil, errParsingInt
}
2020-10-20 04:33:20 +00:00
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
2020-10-16 01:29:26 +00:00
}
}
2021-09-02 05:26:44 +00:00
func hexValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var originalString = string(m.Bytes)
var numberString = originalString[2:]
log.Debugf("numberString: %v", numberString)
var number, errParsingInt = strconv.ParseInt(numberString, 16, 64)
2021-09-02 05:26:44 +00:00
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil
}
}
2020-10-17 11:39:01 +00:00
func floatValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
var numberString = string(m.Bytes)
var number, errParsingInt = strconv.ParseFloat(numberString, 64)
2020-10-17 11:39:01 +00:00
if errParsingInt != nil {
return nil, errParsingInt
}
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
2020-10-17 11:39:01 +00:00
}
}
2020-10-16 01:29:26 +00:00
func booleanValue(val bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: createValueOperation(val, string(m.Bytes))}, nil
2020-10-16 01:29:26 +00:00
}
}
func stringValue(wrapped bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
if wrapped {
value = unwrap(value)
}
value = strings.ReplaceAll(value, "\\\"", "\"")
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil
2020-10-16 01:29:26 +00:00
}
}
2021-02-03 04:51:26 +00:00
func getVariableOpToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
2021-02-15 06:31:12 +00:00
value = value[1:]
2021-02-03 04:51:26 +00:00
getVarOperation := createValueOperation(value, value)
getVarOperation.OperationType = getVariableOpType
return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil
}
}
2021-01-09 00:33:39 +00:00
func envOp(strenv bool) lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
value := string(m.Bytes)
preferences := envOpPreferences{}
2021-01-09 00:33:39 +00:00
if strenv {
// strenv( )
2021-01-09 01:06:19 +00:00
value = value[7 : len(value)-1]
preferences.StringValue = true
} else {
2021-01-09 00:33:39 +00:00
//env( )
2021-01-09 01:06:19 +00:00
value = value[4 : len(value)-1]
}
2021-01-09 00:33:39 +00:00
envOperation := createValueOperation(value, value)
envOperation.OperationType = envOpType
2021-01-09 01:06:19 +00:00
envOperation.Preferences = preferences
2021-01-09 00:33:39 +00:00
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: envOperation}, nil
2021-01-09 00:33:39 +00:00
}
}
2020-10-20 04:40:11 +00:00
func nullValue() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: createValueOperation(nil, string(m.Bytes))}, nil
2020-10-20 04:40:11 +00:00
}
}
2020-10-16 01:29:26 +00:00
func selfToken() lex.Action {
return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
op := &Operation{OperationType: selfReferenceOpType}
2021-01-11 22:55:55 +00:00
return &token{TokenType: operationToken, Operation: op}, nil
2020-09-17 12:12:56 +00:00
}
}
2020-09-17 11:58:01 +00:00
func initLexer() (*lex.Lexer, error) {
lexer := lex.NewLexer()
2021-01-11 22:55:55 +00:00
lexer.Add([]byte(`\(`), literalToken(openBracket, false))
lexer.Add([]byte(`\)`), literalToken(closeBracket, true))
2020-10-11 00:24:22 +00:00
2021-01-11 22:55:55 +00:00
lexer.Add([]byte(`\.\[`), literalToken(traverseArrayCollect, false))
lexer.Add([]byte(`\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true,
2021-01-13 05:54:28 +00:00
TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: false}}))
2020-12-28 00:24:42 +00:00
lexer.Add([]byte(`\.\.\.`), opTokenWithPrefs(recursiveDescentOpType, nil, recursiveDescentPreferences{RecurseArray: true,
2021-01-13 05:54:28 +00:00
TraversePreferences: traversePreferences{DontFollowAlias: true, IncludeMapKeys: true}}))
2020-10-11 00:24:22 +00:00
lexer.Add([]byte(`,`), opToken(unionOpType))
lexer.Add([]byte(`:\s*`), opToken(createMapOpType))
lexer.Add([]byte(`length`), opToken(lengthOpType))
2021-10-22 01:00:47 +00:00
2022-02-01 03:47:51 +00:00
lexer.Add([]byte(`eval`), opToken(evalOpType))
2021-11-30 23:32:36 +00:00
lexer.Add([]byte(`map`), opToken(mapOpType))
lexer.Add([]byte(`map_values`), opToken(mapValuesOpType))
2021-10-26 04:42:25 +00:00
lexer.Add([]byte(`flatten\([0-9]+\)`), flattenWithDepth())
lexer.Add([]byte(`flatten`), opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1}))
2021-10-24 00:35:40 +00:00
lexer.Add([]byte(`toyaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat))
lexer.Add([]byte(`to_yaml\([0-9]+\)`), encodeWithIndent(YamlOutputFormat))
2021-12-21 05:52:54 +00:00
lexer.Add([]byte(`toxml\([0-9]+\)`), encodeWithIndent(XmlOutputFormat))
lexer.Add([]byte(`to_xml\([0-9]+\)`), encodeWithIndent(XmlOutputFormat))
2021-10-24 00:35:40 +00:00
lexer.Add([]byte(`tojson\([0-9]+\)`), encodeWithIndent(JsonOutputFormat))
lexer.Add([]byte(`to_json\([0-9]+\)`), encodeWithIndent(JsonOutputFormat))
lexer.Add([]byte(`toyaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2}))
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`to_yaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2}))
2021-12-02 01:11:15 +00:00
// 0 indent doesn't work with yaml.
lexer.Add([]byte(`@yaml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: YamlOutputFormat, indent: 2}))
2021-12-01 01:08:47 +00:00
2021-10-24 00:35:40 +00:00
lexer.Add([]byte(`tojson`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JsonOutputFormat, indent: 2}))
lexer.Add([]byte(`to_json`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JsonOutputFormat, indent: 2}))
2021-12-01 01:08:47 +00:00
lexer.Add([]byte(`@json`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: JsonOutputFormat, indent: 0}))
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`toprops`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2}))
lexer.Add([]byte(`to_props`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2}))
lexer.Add([]byte(`@props`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: PropsOutputFormat, indent: 2}))
lexer.Add([]byte(`tocsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CsvOutputFormat}))
2021-12-01 01:08:47 +00:00
lexer.Add([]byte(`to_csv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CsvOutputFormat}))
lexer.Add([]byte(`@csv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: CsvOutputFormat}))
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`totsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TsvOutputFormat}))
2021-12-01 01:08:47 +00:00
lexer.Add([]byte(`to_tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TsvOutputFormat}))
lexer.Add([]byte(`@tsv`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: TsvOutputFormat}))
2021-12-21 05:52:54 +00:00
lexer.Add([]byte(`toxml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XmlOutputFormat}))
lexer.Add([]byte(`to_xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XmlOutputFormat, indent: 2}))
lexer.Add([]byte(`@xml`), opTokenWithPrefs(encodeOpType, nil, encoderPreferences{format: XmlOutputFormat, indent: 0}))
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`fromyaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`fromjson`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`fromxml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XmlInputFormat}))
2021-10-22 01:37:47 +00:00
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`from_yaml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`from_json`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: YamlInputFormat}))
lexer.Add([]byte(`from_xml`), opTokenWithPrefs(decodeOpType, nil, decoderPreferences{format: XmlInputFormat}))
2021-10-22 01:37:47 +00:00
lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType))
2021-12-04 02:54:12 +00:00
lexer.Add([]byte(`sort_keys`), opToken(sortKeysOpType))
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`load`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewYamlDecoder()}))
2021-12-21 05:52:54 +00:00
lexer.Add([]byte(`xmlload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName)}))
lexer.Add([]byte(`load_xml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName)}))
lexer.Add([]byte(`loadxml`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: false, decoder: NewXmlDecoder(XmlPreferences.AttributePrefix, XmlPreferences.ContentName)}))
2021-12-21 04:02:07 +00:00
2021-11-16 04:29:16 +00:00
lexer.Add([]byte(`strload`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
2021-12-21 04:02:07 +00:00
lexer.Add([]byte(`load_str`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
lexer.Add([]byte(`loadstr`), opTokenWithPrefs(loadOpType, nil, loadPrefs{loadAsString: true}))
lexer.Add([]byte(`select`), opToken(selectOpType))
lexer.Add([]byte(`has`), opToken(hasOpType))
2021-05-13 23:43:52 +00:00
lexer.Add([]byte(`unique`), opToken(uniqueOpType))
lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType))
2021-10-26 04:07:50 +00:00
lexer.Add([]byte(`group_by`), opToken(groupByOpType))
lexer.Add([]byte(`explode`), opToken(explodeOpType))
lexer.Add([]byte(`or`), opToken(orOpType))
lexer.Add([]byte(`and`), opToken(andOpType))
lexer.Add([]byte(`not`), opToken(notOpType))
2021-02-15 05:06:37 +00:00
lexer.Add([]byte(`ireduce`), opToken(reduceOpType))
2021-02-15 03:27:00 +00:00
lexer.Add([]byte(`;`), opToken(blockOpType))
lexer.Add([]byte(`\/\/`), opToken(alternativeOpType))
2020-11-02 00:20:38 +00:00
lexer.Add([]byte(`documentIndex`), opToken(getDocumentIndexOpType))
lexer.Add([]byte(`di`), opToken(getDocumentIndexOpType))
2021-01-14 03:25:31 +00:00
lexer.Add([]byte(`splitDoc`), opToken(splitDocumentOpType))
2020-11-06 01:11:38 +00:00
2021-01-14 03:46:50 +00:00
lexer.Add([]byte(`join`), opToken(joinStringOpType))
2021-04-15 00:09:41 +00:00
lexer.Add([]byte(`sub`), opToken(subStringOpType))
2021-07-07 12:40:46 +00:00
lexer.Add([]byte(`match`), opToken(matchOpType))
2021-07-11 01:08:18 +00:00
lexer.Add([]byte(`capture`), opToken(captureOpType))
2021-07-09 05:54:56 +00:00
lexer.Add([]byte(`test`), opToken(testOpType))
2021-04-15 00:09:41 +00:00
2021-12-04 02:54:12 +00:00
lexer.Add([]byte(`sort`), opToken(sortOpType))
2021-11-28 02:25:22 +00:00
lexer.Add([]byte(`sort_by`), opToken(sortByOpType))
2021-05-14 04:29:55 +00:00
lexer.Add([]byte(`any`), opToken(anyOpType))
2021-05-14 05:01:44 +00:00
lexer.Add([]byte(`any_c`), opToken(anyConditionOpType))
2021-05-14 04:29:55 +00:00
lexer.Add([]byte(`all`), opToken(allOpType))
2021-05-14 05:01:44 +00:00
lexer.Add([]byte(`all_c`), opToken(allConditionOpType))
2021-09-15 05:18:10 +00:00
lexer.Add([]byte(`contains`), opToken(containsOpType))
2021-05-14 04:29:55 +00:00
2021-01-14 04:05:50 +00:00
lexer.Add([]byte(`split`), opToken(splitStringOpType))
2021-11-23 22:57:35 +00:00
2021-11-23 23:16:48 +00:00
lexer.Add([]byte(`parent`), opToken(getParentOpType))
2021-11-23 22:57:35 +00:00
lexer.Add([]byte(`key`), opToken(getKeyOpType))
2021-01-14 04:45:07 +00:00
lexer.Add([]byte(`keys`), opToken(keysOpType))
2021-01-14 03:46:50 +00:00
lexer.Add([]byte(`style`), opAssignableToken(getStyleOpType, assignStyleOpType))
2020-11-02 00:20:38 +00:00
lexer.Add([]byte(`tag`), opAssignableToken(getTagOpType, assignTagOpType))
lexer.Add([]byte(`anchor`), opAssignableToken(getAnchorOpType, assignAnchorOpType))
lexer.Add([]byte(`alias`), opAssignableToken(getAliasOptype, assignAliasOpType))
lexer.Add([]byte(`filename`), opToken(getFilenameOpType))
lexer.Add([]byte(`fileIndex`), opToken(getFileIndexOpType))
lexer.Add([]byte(`fi`), opToken(getFileIndexOpType))
lexer.Add([]byte(`path`), opToken(getPathOpType))
2021-05-09 03:59:23 +00:00
lexer.Add([]byte(`to_entries`), opToken(toEntriesOpType))
2021-05-09 04:18:25 +00:00
lexer.Add([]byte(`from_entries`), opToken(fromEntriesOpType))
2021-05-09 05:12:50 +00:00
lexer.Add([]byte(`with_entries`), opToken(withEntriesOpType))
2020-11-06 00:45:18 +00:00
2021-09-12 11:52:02 +00:00
lexer.Add([]byte(`with`), opToken(withOpType))
lexer.Add([]byte(`lineComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true}))
2020-11-06 00:45:18 +00:00
lexer.Add([]byte(`headComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true}))
2020-11-06 00:45:18 +00:00
lexer.Add([]byte(`footComment`), opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true}))
2020-11-19 05:45:05 +00:00
2021-01-06 09:22:50 +00:00
lexer.Add([]byte(`comments\s*=`), assignAllCommentsOp(false))
lexer.Add([]byte(`comments\s*\|=`), assignAllCommentsOp(true))
2020-11-06 00:23:26 +00:00
lexer.Add([]byte(`collect`), opToken(collectOpType))
2020-10-11 00:24:22 +00:00
lexer.Add([]byte(`\s*==\s*`), opToken(equalsOpType))
2021-02-04 22:54:03 +00:00
lexer.Add([]byte(`\s*!=\s*`), opToken(notEqualsOpType))
2021-01-06 09:22:50 +00:00
lexer.Add([]byte(`\s*=\s*`), assignOpToken(false))
2020-10-11 00:24:22 +00:00
lexer.Add([]byte(`del`), opToken(deleteChildOpType))
2020-10-11 00:24:22 +00:00
2021-01-06 09:22:50 +00:00
lexer.Add([]byte(`\s*\|=\s*`), assignOpToken(true))
2020-10-16 01:29:26 +00:00
2020-09-17 11:58:01 +00:00
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
2020-10-11 00:24:22 +00:00
2021-05-09 05:12:50 +00:00
lexer.Add([]byte(`\."[^ "]+"\??`), pathToken(true))
2021-09-15 12:24:03 +00:00
lexer.Add([]byte(`\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`), pathToken(false))
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`\.`), selfToken())
lexer.Add([]byte(`\|`), opToken(pipeOpType))
2020-10-16 01:29:26 +00:00
2021-09-02 05:26:44 +00:00
lexer.Add([]byte(`0[xX][0-9A-Fa-f]+`), hexValue())
2020-10-17 11:39:01 +00:00
lexer.Add([]byte(`-?\d+(\.\d+)`), floatValue())
lexer.Add([]byte(`-?[1-9](\.\d+)?[Ee][-+]?\d+`), floatValue())
lexer.Add([]byte(`-?\d+`), numberValue())
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`[Tt][Rr][Uu][Ee]`), booleanValue(true))
lexer.Add([]byte(`[Ff][Aa][Ll][Ss][Ee]`), booleanValue(false))
2020-10-11 00:24:22 +00:00
2020-10-20 04:40:11 +00:00
lexer.Add([]byte(`[Nn][Uu][Ll][Ll]`), nullValue())
lexer.Add([]byte(`~`), nullValue())
lexer.Add([]byte(`"([^"\\]*(\\.[^"\\]*)*)"`), stringValue(true))
2021-01-09 00:33:39 +00:00
lexer.Add([]byte(`strenv\([^\)]+\)`), envOp(true))
2021-01-09 01:06:19 +00:00
lexer.Add([]byte(`env\([^\)]+\)`), envOp(false))
2020-10-16 01:29:26 +00:00
lexer.Add([]byte(`envsubst`), opToken(envsubstOpType))
2021-01-11 22:55:55 +00:00
lexer.Add([]byte(`\[`), literalToken(openCollect, false))
2021-05-09 05:36:33 +00:00
lexer.Add([]byte(`\]\??`), literalToken(closeCollect, true))
2021-01-11 22:55:55 +00:00
lexer.Add([]byte(`\{`), literalToken(openCollectObject, false))
lexer.Add([]byte(`\}`), literalToken(closeCollectObject, true))
2022-01-22 05:40:17 +00:00
lexer.Add([]byte(`\*=[\+|\?dn]*`), multiplyWithPrefs(multiplyAssignOpType))
lexer.Add([]byte(`\*[\+|\?dn]*`), multiplyWithPrefs(multiplyOpType))
lexer.Add([]byte(`\+`), opToken(addOpType))
lexer.Add([]byte(`\+=`), opToken(addAssignOpType))
2022-01-22 05:40:17 +00:00
2021-03-24 21:12:01 +00:00
lexer.Add([]byte(`\-`), opToken(subtractOpType))
lexer.Add([]byte(`\-=`), opToken(subtractAssignOpType))
2021-02-03 04:51:26 +00:00
lexer.Add([]byte(`\$[a-zA-Z_-0-9]+`), getVariableOpToken())
2021-09-12 06:55:55 +00:00
lexer.Add([]byte(`as`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{}))
lexer.Add([]byte(`ref`), opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true}))
2020-10-16 01:29:26 +00:00
2021-02-25 05:47:55 +00:00
err := lexer.CompileNFA()
2020-09-17 11:58:01 +00:00
if err != nil {
return nil, err
}
return lexer, nil
}
2021-01-12 23:18:53 +00:00
type expressionTokeniser interface {
Tokenise(expression string) ([]*token, error)
2020-09-17 11:58:01 +00:00
}
2021-01-12 23:18:53 +00:00
type expressionTokeniserImpl struct {
2020-09-17 11:58:01 +00:00
lexer *lex.Lexer
}
2021-01-12 23:18:53 +00:00
func newExpressionTokeniser() expressionTokeniser {
2020-09-17 11:58:01 +00:00
var lexer, err = initLexer()
if err != nil {
panic(err)
}
2021-01-12 23:18:53 +00:00
return &expressionTokeniserImpl{lexer}
2020-09-17 11:58:01 +00:00
}
2021-01-12 23:18:53 +00:00
func (p *expressionTokeniserImpl) Tokenise(expression string) ([]*token, error) {
scanner, err := p.lexer.Scanner([]byte(expression))
2020-09-17 11:58:01 +00:00
if err != nil {
2021-11-22 06:43:38 +00:00
return nil, fmt.Errorf("Parsing expression: %w", err)
2020-09-17 11:58:01 +00:00
}
var tokens []*token
2020-09-17 11:58:01 +00:00
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if tok != nil {
currentToken := tok.(*token)
2021-02-03 06:11:47 +00:00
log.Debugf("Tokenising %v", currentToken.toString(true))
tokens = append(tokens, currentToken)
2020-09-17 11:58:01 +00:00
}
if err != nil {
2021-11-22 06:43:38 +00:00
return nil, fmt.Errorf("Parsing expression: %w", err)
2020-09-17 11:58:01 +00:00
}
}
var postProcessedTokens = make([]*token, 0)
2020-09-24 00:52:45 +00:00
2020-11-19 05:45:05 +00:00
skipNextToken := false
2020-10-11 00:24:22 +00:00
for index := range tokens {
2020-11-19 05:45:05 +00:00
if skipNextToken {
skipNextToken = false
} else {
postProcessedTokens, skipNextToken = p.handleToken(tokens, index, postProcessedTokens)
2020-09-24 00:52:45 +00:00
}
}
2020-09-17 11:58:01 +00:00
2020-09-24 00:52:45 +00:00
return postProcessedTokens, nil
2020-09-17 11:58:01 +00:00
}
2021-01-12 23:18:53 +00:00
func (p *expressionTokeniserImpl) handleToken(tokens []*token, index int, postProcessedTokens []*token) (tokensAccum []*token, skipNextToken bool) {
skipNextToken = false
currentToken := tokens[index]
2021-05-21 04:18:24 +00:00
log.Debug("processing %v", currentToken.toString(true))
2021-01-11 22:55:55 +00:00
if currentToken.TokenType == traverseArrayCollect {
2021-10-18 00:43:06 +00:00
// `.[exp]`` works by creating a traversal array of [self, exp] and piping that into the traverse array operator
//need to put a traverse array then a collect currentToken
// do this by adding traverse then converting currentToken to collect
2021-10-18 00:43:06 +00:00
log.Debug(" adding self")
op := &Operation{OperationType: selfReferenceOpType, StringValue: "SELF"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
2021-05-21 04:18:24 +00:00
log.Debug(" adding traverse array")
2021-10-18 00:43:06 +00:00
op = &Operation{OperationType: traverseArrayOpType, StringValue: "TRAVERSE_ARRAY"}
2021-01-11 22:55:55 +00:00
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
2020-12-26 10:37:08 +00:00
2021-01-11 22:55:55 +00:00
currentToken = &token{TokenType: openCollect}
}
if index != len(tokens)-1 && currentToken.AssignOperation != nil &&
2021-01-11 22:55:55 +00:00
tokens[index+1].TokenType == operationToken &&
tokens[index+1].Operation.OperationType == assignOpType {
2021-05-21 04:18:24 +00:00
log.Debug(" its an update assign")
currentToken.Operation = currentToken.AssignOperation
currentToken.Operation.UpdateAssign = tokens[index+1].Operation.UpdateAssign
skipNextToken = true
}
2021-05-21 04:18:24 +00:00
log.Debug(" adding token to the fixed list")
postProcessedTokens = append(postProcessedTokens, currentToken)
2021-02-03 06:20:54 +00:00
if index != len(tokens)-1 &&
((currentToken.TokenType == openCollect && tokens[index+1].TokenType == closeCollect) ||
(currentToken.TokenType == openCollectObject && tokens[index+1].TokenType == closeCollectObject)) {
2021-05-21 04:18:24 +00:00
log.Debug(" adding empty")
2021-02-03 06:20:54 +00:00
op := &Operation{OperationType: emptyOpType, StringValue: "EMPTY"}
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
if index != len(tokens)-1 && currentToken.CheckForPostTraverse &&
2021-10-18 00:43:06 +00:00
((tokens[index+1].TokenType == operationToken && (tokens[index+1].Operation.OperationType == traversePathOpType)) ||
(tokens[index+1].TokenType == traverseArrayCollect)) {
2021-05-21 04:18:24 +00:00
log.Debug(" adding pipe because the next thing is traverse")
2021-10-18 00:43:06 +00:00
op := &Operation{OperationType: shortPipeOpType, Value: "PIPE", StringValue: "."}
2021-01-11 22:55:55 +00:00
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
}
if index != len(tokens)-1 && currentToken.CheckForPostTraverse &&
2021-01-11 22:55:55 +00:00
tokens[index+1].TokenType == openCollect {
2020-12-26 10:37:08 +00:00
2021-05-21 04:18:24 +00:00
log.Debug(" adding traverArray because next is opencollect")
2021-02-03 04:51:26 +00:00
op := &Operation{OperationType: traverseArrayOpType}
2021-01-11 22:55:55 +00:00
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: op})
2020-12-26 10:37:08 +00:00
}
return postProcessedTokens, skipNextToken
}