yq/pkg/yqlib/lexer_participle.go

565 lines
19 KiB
Go
Raw Normal View History

2022-07-13 04:45:28 +00:00
package yqlib
import (
"strconv"
"strings"
"github.com/alecthomas/participle/v2/lexer"
)
var participleYqRules = []*participleYqRule{
{"LINE_COMMENT", `line_?comment|lineComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{LineComment: true}), 0},
{"HEAD_COMMENT", `head_?comment|headComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{HeadComment: true}), 0},
{"FOOT_COMMENT", `foot_?comment|footComment`, opTokenWithPrefs(getCommentOpType, assignCommentOpType, commentOpPreferences{FootComment: true}), 0},
{"OpenBracket", `\(`, literalToken(openBracket, false), 0},
{"CloseBracket", `\)`, literalToken(closeBracket, true), 0},
{"OpenTraverseArrayCollect", `\.\[`, literalToken(traverseArrayCollect, false), 0},
{"OpenCollect", `\[`, literalToken(openCollect, false), 0},
{"CloseCollect", `\]\??`, literalToken(closeCollect, true), 0},
{"OpenCollectObject", `\{`, literalToken(openCollectObject, false), 0},
{"CloseCollectObject", `\}`, literalToken(closeCollectObject, true), 0},
{"RecursiveDecentIncludingKeys", `\.\.\.`, recursiveDecentOpToken(true), 0},
{"RecursiveDecent", `\.\.`, recursiveDecentOpToken(false), 0},
{"GetVariable", `\$[a-zA-Z_\-0-9]+`, getVariableOpToken(), 0},
{"AsignAsVariable", `as`, opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{}), 0},
{"AsignRefVariable", `ref`, opTokenWithPrefs(assignVariableOpType, nil, assignVarPreferences{IsReference: true}), 0},
{"CreateMap", `:\s*`, opToken(createMapOpType), 0},
simpleOp("length", lengthOpType),
simpleOp("line", lineOpType),
simpleOp("column", columnOpType),
simpleOp("eval", evalOpType),
{"MapValues", `map_?values`, opToken(mapValuesOpType), 0},
simpleOp("map", mapOpType),
simpleOp("pick", pickOpType),
{"FlattenWithDepth", `flatten\([0-9]+\)`, flattenWithDepth(), 0},
{"Flatten", `flatten`, opTokenWithPrefs(flattenOpType, nil, flattenPreferences{depth: -1}), 0},
simpleOp("format_datetime", formatDateTimeOpType),
simpleOp("now", nowOpType),
simpleOp("tz", tzOpType),
2023-02-02 01:42:36 +00:00
simpleOp("from_?unix", fromUnixOpType),
2023-02-02 01:56:16 +00:00
simpleOp("to_?unix", toUnixOpType),
2022-07-13 04:45:28 +00:00
simpleOp("with_dtf", withDtFormatOpType),
simpleOp("error", errorOpType),
simpleOp("sortKeys", sortKeysOpType),
simpleOp("sort_?keys", sortKeysOpType),
2022-11-08 02:40:00 +00:00
{"ArrayToMap", "array_?to_?map", expressionOpToken(`(.[] | select(. != null) ) as $i ireduce({}; .[$i | key] = $i)`), 0},
2022-07-13 04:45:28 +00:00
{"YamlEncodeWithIndent", `to_?yaml\([0-9]+\)`, encodeParseIndent(YamlOutputFormat), 0},
{"XMLEncodeWithIndent", `to_?xml\([0-9]+\)`, encodeParseIndent(XMLOutputFormat), 0},
{"JSONEncodeWithIndent", `to_?json\([0-9]+\)`, encodeParseIndent(JSONOutputFormat), 0},
{"YamlDecode", `from_?yaml|@yamld|from_?json|@jsond`, decodeOp(YamlInputFormat), 0},
{"YamlEncode", `to_?yaml|@yaml`, encodeWithIndent(YamlOutputFormat, 2), 0},
{"JSONEncode", `to_?json`, encodeWithIndent(JSONOutputFormat, 2), 0},
{"JSONEncodeNoIndent", `@json`, encodeWithIndent(JSONOutputFormat, 0), 0},
{"PropertiesDecode", `from_?props|@propsd`, decodeOp(PropertiesInputFormat), 0},
{"PropsEncode", `to_?props|@props`, encodeWithIndent(PropsOutputFormat, 2), 0},
{"XmlDecode", `from_?xml|@xmld`, decodeOp(XMLInputFormat), 0},
{"XMLEncode", `to_?xml`, encodeWithIndent(XMLOutputFormat, 2), 0},
{"XMLEncodeNoIndent", `@xml`, encodeWithIndent(XMLOutputFormat, 0), 0},
{"CSVDecode", `from_?csv|@csvd`, decodeOp(CSVObjectInputFormat), 0},
2022-07-13 04:45:28 +00:00
{"CSVEncode", `to_?csv|@csv`, encodeWithIndent(CSVOutputFormat, 0), 0},
{"TSVDecode", `from_?tsv|@tsvd`, decodeOp(TSVObjectInputFormat), 0},
2022-07-13 04:45:28 +00:00
{"TSVEncode", `to_?tsv|@tsv`, encodeWithIndent(TSVOutputFormat, 0), 0},
{"Base64d", `@base64d`, decodeOp(Base64InputFormat), 0},
{"Base64", `@base64`, encodeWithIndent(Base64OutputFormat, 0), 0},
2023-01-25 00:18:48 +00:00
{"Urid", `@urid`, decodeOp(UriInputFormat), 0},
{"Uri", `@uri`, encodeWithIndent(UriOutputFormat, 0), 0},
2023-02-02 01:22:52 +00:00
{"SH", `@sh`, encodeWithIndent(ShOutputFormat, 0), 0},
2023-01-23 00:37:18 +00:00
2022-10-25 03:27:16 +00:00
{"LoadXML", `load_?xml|xml_?load`, loadOp(NewXMLDecoder(ConfiguredXMLPreferences), false), 0},
2022-07-13 04:45:28 +00:00
{"LoadBase64", `load_?base64`, loadOp(NewBase64Decoder(), false), 0},
{"LoadProperties", `load_?props`, loadOp(NewPropertiesDecoder(), false), 0},
{"LoadString", `load_?str|str_?load`, loadOp(nil, true), 0},
{"LoadYaml", `load`, loadOp(NewYamlDecoder(LoadYamlPreferences), false), 0},
2022-07-13 04:45:28 +00:00
{"SplitDocument", `splitDoc|split_?doc`, opToken(splitDocumentOpType), 0},
simpleOp("select", selectOpType),
simpleOp("has", hasOpType),
simpleOp("unique_?by", uniqueByOpType),
simpleOp("unique", uniqueOpType),
simpleOp("group_?by", groupByOpType),
simpleOp("explode", explodeOpType),
simpleOp("or", orOpType),
simpleOp("and", andOpType),
simpleOp("not", notOpType),
simpleOp("ireduce", reduceOpType),
simpleOp("join", joinStringOpType),
simpleOp("sub", subStringOpType),
simpleOp("match", matchOpType),
simpleOp("capture", captureOpType),
simpleOp("test", testOpType),
simpleOp("sort_?by", sortByOpType),
simpleOp("sort", sortOpType),
simpleOp("reverse", reverseOpType),
simpleOp("any_c", anyConditionOpType),
simpleOp("any", anyOpType),
simpleOp("all_c", allConditionOpType),
simpleOp("all", allOpType),
simpleOp("contains", containsOpType),
simpleOp("split", splitStringOpType),
simpleOp("parent", getParentOpType),
simpleOp("keys", keysOpType),
simpleOp("key", getKeyOpType),
2022-09-30 00:27:35 +00:00
simpleOp("is_?key", isKeyOpType),
2022-07-13 04:45:28 +00:00
simpleOp("file_?name|fileName", getFilenameOpType),
simpleOp("file_?index|fileIndex|fi", getFileIndexOpType),
simpleOp("path", getPathOpType),
2022-10-05 03:12:08 +00:00
simpleOp("set_?path", setPathOpType),
2022-10-05 09:09:53 +00:00
simpleOp("del_?paths", delPathsOpType),
2022-07-13 04:45:28 +00:00
simpleOp("to_?entries|toEntries", toEntriesOpType),
simpleOp("from_?entries|fromEntries", fromEntriesOpType),
simpleOp("with_?entries|withEntries", withEntriesOpType),
simpleOp("with", withOpType),
simpleOp("collect", collectOpType),
simpleOp("del", deleteChildOpType),
assignableOp("style", getStyleOpType, assignStyleOpType),
assignableOp("tag|type", getTagOpType, assignTagOpType),
assignableOp("anchor", getAnchorOpType, assignAnchorOpType),
assignableOp("alias", getAliasOpType, assignAliasOpType),
{"ALL_COMMENTS", `comments\s*=`, assignAllCommentsOp(false), 0},
{"ALL_COMMENTS_ASSIGN_RELATIVE", `comments\s*\|=`, assignAllCommentsOp(true), 0},
{"Block", `;`, opToken(blockOpType), 0},
{"Alternative", `\/\/`, opToken(alternativeOpType), 0},
{"DocumentIndex", `documentIndex|document_?index|di`, opToken(getDocumentIndexOpType), 0},
{"Uppercase", `upcase|ascii_?upcase`, opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: true}), 0},
{"Downcase", `downcase|ascii_?downcase`, opTokenWithPrefs(changeCaseOpType, nil, changeCasePrefs{ToUpperCase: false}), 0},
2022-08-08 03:35:57 +00:00
simpleOp("trim", trimOpType),
2022-07-13 04:45:28 +00:00
{"HexValue", `0[xX][0-9A-Fa-f]+`, hexValue(), 0},
{"FloatValueScientific", `-?[1-9](\.\d+)?[Ee][-+]?\d+`, floatValue(), 0},
{"FloatValue", `-?\d+(\.\d+)`, floatValue(), 0},
{"NumberValue", `-?\d+`, numberValue(), 0},
{"TrueBooleanValue", `[Tt][Rr][Uu][Ee]`, booleanValue(true), 0},
{"FalseBooleanValue", `[Ff][Aa][Ll][Ss][Ee]`, booleanValue(false), 0},
{"NullValue", `[Nn][Uu][Ll][Ll]|~`, nullValue(), 0},
{"QuotedStringValue", `"([^"\\]*(\\.[^"\\]*)*)"`, stringValue(), 0},
{"StrEnvOp", `strenv\([^\)]+\)`, envOp(true), 0},
{"EnvOp", `env\([^\)]+\)`, envOp(false), 0},
{"EnvSubstWithOptions", `envsubst\((ne|nu|ff| |,)+\)`, envSubstWithOptions(), 0},
simpleOp("envsubst", envsubstOpType),
{"Equals", `\s*==\s*`, opToken(equalsOpType), 0},
{"NotEquals", `\s*!=\s*`, opToken(notEqualsOpType), 0},
{"GreaterThanEquals", `\s*>=\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: true}), 0},
{"LessThanEquals", `\s*<=\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: true, Greater: false}), 0},
{"GreaterThan", `\s*>\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: true}), 0},
{"LessThan", `\s*<\s*`, opTokenWithPrefs(compareOpType, nil, compareTypePref{OrEqual: false, Greater: false}), 0},
{"AssignRelative", `\|=[c]*`, assignOpToken(true), 0},
{"Assign", `=[c]*`, assignOpToken(false), 0},
2022-07-13 04:45:28 +00:00
{`whitespace`, `[ \t\n]+`, nil, 0},
{"WrappedPathElement", `\."[^ "]+"\??`, pathToken(true), 0},
{"PathElement", `\.[^ ;\}\{\:\[\],\|\.\[\(\)=\n]+\??`, pathToken(false), 0},
{"Pipe", `\|`, opToken(pipeOpType), 0},
{"Self", `\.`, opToken(selfReferenceOpType), 0},
{"Union", `,`, opToken(unionOpType), 0},
{"MultiplyAssign", `\*=[\+|\?cdn]*`, multiplyWithPrefs(multiplyAssignOpType), 0},
{"Multiply", `\*[\+|\?cdn]*`, multiplyWithPrefs(multiplyOpType), 0},
2022-07-13 04:45:28 +00:00
{"AddAssign", `\+=`, opToken(addAssignOpType), 0},
{"Add", `\+`, opToken(addOpType), 0},
{"SubtractAssign", `\-=`, opToken(subtractAssignOpType), 0},
{"Subtract", `\-`, opToken(subtractOpType), 0},
}
type yqAction func(lexer.Token) (*token, error)
type participleYqRule struct {
Name string
Pattern string
CreateYqToken yqAction
ParticipleTokenType lexer.TokenType
}
type participleLexer struct {
lexerDefinition lexer.StringDefinition
}
func simpleOp(name string, opType *operationType) *participleYqRule {
return &participleYqRule{strings.ToUpper(string(name[1])) + name[1:], name, opToken(opType), 0}
}
func assignableOp(name string, opType *operationType, assignOpType *operationType) *participleYqRule {
return &participleYqRule{strings.ToUpper(string(name[1])) + name[1:], name, opTokenWithPrefs(opType, assignOpType, nil), 0}
}
func newParticipleLexer() expressionTokeniser {
simpleRules := make([]lexer.SimpleRule, len(participleYqRules))
for i, yqRule := range participleYqRules {
simpleRules[i] = lexer.SimpleRule{Name: yqRule.Name, Pattern: yqRule.Pattern}
}
lexerDefinition := lexer.MustSimple(simpleRules)
symbols := lexerDefinition.Symbols()
for _, yqRule := range participleYqRules {
yqRule.ParticipleTokenType = symbols[yqRule.Name]
}
return &participleLexer{lexerDefinition}
}
func pathToken(wrapped bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
prefs := traversePreferences{}
if value[len(value)-1:] == "?" {
prefs.OptionalTraverse = true
value = value[:len(value)-1]
}
value = value[1:]
if wrapped {
value = unwrap(value)
}
log.Debug("PathToken %v", value)
op := &Operation{OperationType: traversePathOpType, Value: value, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op, CheckForPostTraverse: true}, nil
}
}
func recursiveDecentOpToken(includeMapKeys bool) yqAction {
prefs := recursiveDescentPreferences{
RecurseArray: true,
TraversePreferences: traversePreferences{
DontFollowAlias: true,
IncludeMapKeys: includeMapKeys,
},
}
return opTokenWithPrefs(recursiveDescentOpType, nil, prefs)
}
func opTokenWithPrefs(opType *operationType, assignOpType *operationType, preferences interface{}) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
op := &Operation{OperationType: opType, Value: opType.Type, StringValue: value, Preferences: preferences}
var assign *Operation
if assignOpType != nil {
assign = &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, Preferences: preferences}
}
return &token{TokenType: operationToken, Operation: op, AssignOperation: assign}, nil
}
}
2022-11-08 02:40:00 +00:00
func expressionOpToken(expression string) yqAction {
return func(rawToken lexer.Token) (*token, error) {
prefs := expressionOpPreferences{expression: expression}
expressionOp := &Operation{OperationType: expressionOpType, Preferences: prefs}
return &token{TokenType: operationToken, Operation: expressionOp}, nil
}
}
2022-07-13 04:45:28 +00:00
func flattenWithDepth() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
var depth, errParsingInt = extractNumberParameter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := flattenPreferences{depth: depth}
op := &Operation{OperationType: flattenOpType, Value: flattenOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func assignAllCommentsOp(updateAssign bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
log.Debug("assignAllCommentsOp %v", rawToken.Value)
value := rawToken.Value
op := &Operation{
OperationType: assignCommentOpType,
Value: assignCommentOpType.Type,
StringValue: value,
UpdateAssign: updateAssign,
Preferences: commentOpPreferences{LineComment: true, HeadComment: true, FootComment: true},
}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func assignOpToken(updateAssign bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
log.Debug("assignOpToken %v", rawToken.Value)
value := rawToken.Value
prefs := assignPreferences{DontOverWriteAnchor: true}
if strings.Contains(value, "c") {
prefs.ClobberCustomTags = true
}
2022-07-13 04:45:28 +00:00
op := &Operation{OperationType: assignOpType, Value: assignOpType.Type, StringValue: value, UpdateAssign: updateAssign, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func booleanValue(val bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
return &token{TokenType: operationToken, Operation: createValueOperation(val, rawToken.Value)}, nil
}
}
func nullValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
return &token{TokenType: operationToken, Operation: createValueOperation(nil, rawToken.Value)}, nil
}
}
func stringValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
log.Debug("rawTokenvalue: %v", rawToken.Value)
2022-07-13 04:45:28 +00:00
value := unwrap(rawToken.Value)
log.Debug("unwrapped: %v", value)
2022-07-13 04:45:28 +00:00
value = strings.ReplaceAll(value, "\\\"", "\"")
value = strings.ReplaceAll(value, "\\n", "\n")
log.Debug("replaced: %v", value)
2022-07-13 04:45:28 +00:00
return &token{TokenType: operationToken, Operation: createValueOperation(value, value)}, nil
}
}
func envOp(strenv bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
preferences := envOpPreferences{}
if strenv {
// strenv( )
value = value[7 : len(value)-1]
preferences.StringValue = true
} else {
//env( )
value = value[4 : len(value)-1]
}
envOperation := createValueOperation(value, value)
envOperation.OperationType = envOpType
envOperation.Preferences = preferences
return &token{TokenType: operationToken, Operation: envOperation}, nil
}
}
func envSubstWithOptions() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
noEmpty := hasOptionParameter(value, "ne")
noUnset := hasOptionParameter(value, "nu")
failFast := hasOptionParameter(value, "ff")
envsubstOpType.Type = "ENVSUBST"
prefs := envOpPreferences{NoUnset: noUnset, NoEmpty: noEmpty, FailFast: failFast}
if noEmpty {
envsubstOpType.Type = envsubstOpType.Type + "_NO_EMPTY"
}
if noUnset {
envsubstOpType.Type = envsubstOpType.Type + "_NO_UNSET"
}
op := &Operation{OperationType: envsubstOpType, Value: envsubstOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func multiplyWithPrefs(op *operationType) yqAction {
return func(rawToken lexer.Token) (*token, error) {
prefs := multiplyPreferences{}
prefs.AssignPrefs = assignPreferences{}
2022-07-13 04:45:28 +00:00
options := rawToken.Value
if strings.Contains(options, "+") {
prefs.AppendArrays = true
}
if strings.Contains(options, "?") {
prefs.TraversePrefs = traversePreferences{DontAutoCreate: true}
}
if strings.Contains(options, "n") {
prefs.AssignPrefs.OnlyWriteNull = true
2022-07-13 04:45:28 +00:00
}
if strings.Contains(options, "d") {
prefs.DeepMergeArrays = true
}
if strings.Contains(options, "c") {
prefs.AssignPrefs.ClobberCustomTags = true
}
2022-07-13 04:45:28 +00:00
prefs.TraversePrefs.DontFollowAlias = true
op := &Operation{OperationType: op, Value: multiplyOpType.Type, StringValue: options, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func getVariableOpToken() yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
value = value[1:]
getVarOperation := createValueOperation(value, value)
getVarOperation.OperationType = getVariableOpType
return &token{TokenType: operationToken, Operation: getVarOperation, CheckForPostTraverse: true}, nil
}
}
func hexValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
var originalString = rawToken.Value
var numberString = originalString[2:]
log.Debugf("numberString: %v", numberString)
var number, errParsingInt = strconv.ParseInt(numberString, 16, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, originalString)}, nil
}
}
func floatValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
var numberString = rawToken.Value
var number, errParsingInt = strconv.ParseFloat(numberString, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
}
}
func numberValue() yqAction {
return func(rawToken lexer.Token) (*token, error) {
var numberString = rawToken.Value
var number, errParsingInt = strconv.ParseInt(numberString, 10, 64)
if errParsingInt != nil {
return nil, errParsingInt
}
return &token{TokenType: operationToken, Operation: createValueOperation(number, numberString)}, nil
}
}
func encodeParseIndent(outputFormat PrinterOutputFormat) yqAction {
return func(rawToken lexer.Token) (*token, error) {
value := rawToken.Value
var indent, errParsingInt = extractNumberParameter(value)
if errParsingInt != nil {
return nil, errParsingInt
}
prefs := encoderPreferences{format: outputFormat, indent: indent}
op := &Operation{OperationType: encodeOpType, Value: encodeOpType.Type, StringValue: value, Preferences: prefs}
return &token{TokenType: operationToken, Operation: op}, nil
}
}
func encodeWithIndent(outputFormat PrinterOutputFormat, indent int) yqAction {
prefs := encoderPreferences{format: outputFormat, indent: indent}
return opTokenWithPrefs(encodeOpType, nil, prefs)
}
func decodeOp(inputFormat InputFormat) yqAction {
prefs := decoderPreferences{format: inputFormat}
return opTokenWithPrefs(decodeOpType, nil, prefs)
}
func loadOp(decoder Decoder, loadAsString bool) yqAction {
prefs := loadPrefs{decoder: decoder, loadAsString: loadAsString}
return opTokenWithPrefs(loadOpType, nil, prefs)
}
func opToken(op *operationType) yqAction {
return opTokenWithPrefs(op, nil, nil)
}
func literalToken(tt tokenType, checkForPost bool) yqAction {
return func(rawToken lexer.Token) (*token, error) {
return &token{TokenType: tt, CheckForPostTraverse: checkForPost, Match: rawToken.Value}, nil
}
}
func (p *participleLexer) getYqDefinition(rawToken lexer.Token) *participleYqRule {
for _, yqRule := range participleYqRules {
if yqRule.ParticipleTokenType == rawToken.Type {
return yqRule
}
}
return &participleYqRule{}
}
func (p *participleLexer) Tokenise(expression string) ([]*token, error) {
myLexer, err := p.lexerDefinition.LexString("", expression)
if err != nil {
return nil, err
}
tokens := make([]*token, 0)
for {
rawToken, e := myLexer.Next()
if e != nil {
return nil, e
} else if rawToken.Type == lexer.EOF {
return postProcessTokens(tokens), nil
}
definition := p.getYqDefinition(rawToken)
if definition.CreateYqToken != nil {
token, e := definition.CreateYqToken(rawToken)
if e != nil {
return nil, e
}
tokens = append(tokens, token)
}
}
}