diff --git a/pkg/yqlib/doc/String Operators.md b/pkg/yqlib/doc/String Operators.md index 581b2149..088bc5a9 100644 --- a/pkg/yqlib/doc/String Operators.md +++ b/pkg/yqlib/doc/String Operators.md @@ -1,6 +1,81 @@ # String Operators -## Match with names capture groups +## RegEx +This uses golangs native regex functions under the hood - See https://github.com/google/re2/wiki/Syntax for the supported syntax. + +## Join strings +Given a sample.yml file of: +```yaml +- cat +- meow +- 1 +- null +- true +``` +then +```bash +yq eval 'join("; ")' sample.yml +``` +will output +```yaml +cat; meow; 1; ; true +``` + +## Match string +Given a sample.yml file of: +```yaml +cat +``` +then +```bash +yq eval 'match("at")' sample.yml +``` +will output +```yaml +string: at +offset: 1 +length: 2 +captures: [] +``` + +## Match string, case insensitive +Given a sample.yml file of: +```yaml +cAt +``` +then +```bash +yq eval 'match("(?i)at")' sample.yml +``` +will output +```yaml +string: At +offset: 1 +length: 2 +captures: [] +``` + +## Match with capture groups +Given a sample.yml file of: +```yaml +a cat +``` +then +```bash +yq eval 'match("c(.t)")' sample.yml +``` +will output +```yaml +string: cat +offset: 2 +length: 3 +captures: + - string: at + offset: 3 + length: 2 +``` + +## Match with named capture groups Given a sample.yml file of: ```yaml a cat @@ -21,3 +96,109 @@ captures: name: cool ``` +## Match without global flag +Given a sample.yml file of: +```yaml +cat cat +``` +then +```bash +yq eval 'match("cat")' sample.yml +``` +will output +```yaml +string: cat +offset: 0 +length: 3 +captures: [] +``` + +## Match with global flag +Given a sample.yml file of: +```yaml +cat cat +``` +then +```bash +yq eval 'match("cat"; "g")' sample.yml +``` +will output +```yaml +string: cat +offset: 0 +length: 3 +captures: [] +string: cat +offset: 4 +length: 3 +captures: [] +``` + +## Substitute / Replace string +This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax) +Note the use of `|=` to run in context of the current string value. + +Given a sample.yml file of: +```yaml +a: dogs are great +``` +then +```bash +yq eval '.a |= sub("dogs", "cats")' sample.yml +``` +will output +```yaml +a: cats are great +``` + +## Substitute / Replace string with regex +This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax) +Note the use of `|=` to run in context of the current string value. + +Given a sample.yml file of: +```yaml +a: cat +b: heat +``` +then +```bash +yq eval '.[] |= sub("(a)", "${1}r")' sample.yml +``` +will output +```yaml +a: cart +b: heart +``` + +## Split strings +Given a sample.yml file of: +```yaml +cat; meow; 1; ; true +``` +then +```bash +yq eval 'split("; ")' sample.yml +``` +will output +```yaml +- cat +- meow +- "1" +- "" +- "true" +``` + +## Split strings one match +Given a sample.yml file of: +```yaml +word +``` +then +```bash +yq eval 'split("; ")' sample.yml +``` +will output +```yaml +- word +``` + diff --git a/pkg/yqlib/doc/headers/String Operators.md b/pkg/yqlib/doc/headers/String Operators.md index 54c3cb1c..5976cc5c 100644 --- a/pkg/yqlib/doc/headers/String Operators.md +++ b/pkg/yqlib/doc/headers/String Operators.md @@ -1 +1,4 @@ # String Operators + +## RegEx +This uses golangs native regex functions under the hood - See https://github.com/google/re2/wiki/Syntax for the supported syntax. diff --git a/pkg/yqlib/operator_strings.go b/pkg/yqlib/operator_strings.go index 911e207e..a363a9ce 100644 --- a/pkg/yqlib/operator_strings.go +++ b/pkg/yqlib/operator_strings.go @@ -92,13 +92,25 @@ func addMatch(original []*yaml.Node, match string, offset int, name string) []*y return newContent } -func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) { +type matchPreferences struct { + Global bool +} + +func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) { subNames := regEx.SubexpNames() log.Debugf("subNames %v", subNames) - allMatches := regEx.FindAllStringSubmatch(value, -1) - allIndices := regEx.FindAllStringSubmatchIndex(value, -1) + var allMatches [][]string + var allIndices [][]int + + if matchPrefs.Global { + allMatches = regEx.FindAllStringSubmatch(value, -1) + allIndices = regEx.FindAllStringSubmatchIndex(value, -1) + } else { + allMatches = [][]string{regEx.FindStringSubmatch(value)} + allIndices = [][]int{regEx.FindStringSubmatchIndex(value)} + } for i, matches := range allMatches { capturesNode := &yaml.Node{Kind: yaml.SequenceNode} @@ -121,13 +133,38 @@ func match(regEx *regexp.Regexp, candidate *CandidateNode, value string, results } -func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { - //rhs block operator - //lhs of block = regex +func extractMatchArguments(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (string, matchPreferences, error) { + regExExpNode := expressionNode.Rhs - regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), expressionNode.Rhs) + matchPrefs := matchPreferences{} + + // we got given parameters e.g. match(exp; params) + if expressionNode.Rhs.Operation.OperationType == blockOpType { + block := expressionNode.Rhs + regExExpNode = block.Lhs + replacementNodes, err := d.GetMatchingNodes(context, block.Rhs) + if err != nil { + return "", matchPrefs, err + } + paramText := "" + if replacementNodes.MatchingNodes.Front() != nil { + paramText = replacementNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value + } + if strings.Contains(paramText, "g") { + paramText = strings.ReplaceAll(paramText, "g", "") + matchPrefs.Global = true + } + if strings.Contains(paramText, "i") { + return "", matchPrefs, fmt.Errorf(`'i' is not a valid option for match. To ignore case, use an expression like match("(?i)cat")`) + } + if len(paramText) > 0 { + return "", matchPrefs, fmt.Errorf(`Unrecognised match params '%v', please see docs at https://mikefarah.gitbook.io/yq/operators/string-operators`, paramText) + } + } + + regExNodes, err := d.GetMatchingNodes(context.ReadOnlyClone(), regExExpNode) if err != nil { - return Context{}, err + return "", matchPrefs, err } log.Debug(NodesToString(regExNodes.MatchingNodes)) regExStr := "" @@ -135,6 +172,14 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres regExStr = regExNodes.MatchingNodes.Front().Value.(*CandidateNode).Node.Value } log.Debug("regEx %v", regExStr) + return regExStr, matchPrefs, nil +} + +func matchOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { + regExStr, matchPrefs, err := extractMatchArguments(d, context, expressionNode) + if err != nil { + return Context{}, err + } regEx, err := regexp.Compile(regExStr) if err != nil { @@ -147,10 +192,10 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres candidate := el.Value.(*CandidateNode) node := unwrapDoc(candidate.Node) if node.Tag != "!!str" { - return Context{}, fmt.Errorf("cannot substitute with %v, can only substitute strings. Hint: Most often you'll want to use '|=' over '=' for this operation.", node.Tag) + return Context{}, fmt.Errorf("cannot match with %v, can only match strings. Hint: Most often you'll want to use '|=' over '=' for this operation", node.Tag) } - match(regEx, candidate, node.Value, results) + match(matchPrefs, regEx, candidate, node.Value, results) } return context.ChildContext(results), nil diff --git a/pkg/yqlib/operator_strings_test.go b/pkg/yqlib/operator_strings_test.go index 655dc2ec..0199ce52 100644 --- a/pkg/yqlib/operator_strings_test.go +++ b/pkg/yqlib/operator_strings_test.go @@ -5,93 +5,110 @@ import ( ) var stringsOperatorScenarios = []expressionScenario{ - // { - // description: "Join strings", - // document: `[cat, meow, 1, null, true]`, - // expression: `join("; ")`, - // expected: []string{ - // "D0, P[], (!!str)::cat; meow; 1; ; true\n", - // }, - // }, - // { - // description: "Match string", - // document: `cat`, - // expression: `match("at")`, - // expected: []string{ - // "D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n", - // }, - // }, - // { - // description: "Match string, case insensitive", - // document: `cAt`, - // expression: `match("(?i)at")`, - // expected: []string{ - // "D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n", - // }, - // }, - // { - // description: "Match with capture groups", - // document: `a cat`, - // expression: `match("c(.t)")`, - // expected: []string{ - // "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n", - // }, - // }, { - description: "Match with names capture groups", + description: "Join strings", + document: `[cat, meow, 1, null, true]`, + expression: `join("; ")`, + expected: []string{ + "D0, P[], (!!str)::cat; meow; 1; ; true\n", + }, + }, + { + description: "Match string", + document: `cat`, + expression: `match("at")`, + expected: []string{ + "D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n", + }, + }, + { + description: "Match string, case insensitive", + document: `cAt`, + expression: `match("(?i)at")`, + expected: []string{ + "D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n", + }, + }, + { + description: "Match with capture groups", + document: `a cat`, + expression: `match("c(.t)")`, + expected: []string{ + "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n", + }, + }, + { + description: "Match with named capture groups", document: `a cat`, expression: `match("c(?P.t)")`, expected: []string{ "D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n name: cool\n", }, }, - // { - // description: "Substitute / Replace string", - // subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", - // document: `a: dogs are great`, - // expression: `.a |= sub("dogs", "cats")`, - // expected: []string{ - // "D0, P[], (doc)::a: cats are great\n", - // }, - // }, - // { - // description: "Substitute / Replace string with regex", - // subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", - // document: "a: cat\nb: heat", - // expression: `.[] |= sub("(a)", "${1}r")`, - // expected: []string{ - // "D0, P[], (doc)::a: cart\nb: heart\n", - // }, - // }, - // { - // description: "Split strings", - // document: `"cat; meow; 1; ; true"`, - // expression: `split("; ")`, - // expected: []string{ - // "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n", - // }, - // }, - // { - // description: "Split strings one match", - // document: `"word"`, - // expression: `split("; ")`, - // expected: []string{ - // "D0, P[], (!!seq)::- word\n", - // }, - // }, - // { - // skipDoc: true, - // document: `""`, - // expression: `split("; ")`, - // expected: []string{ - // "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error - // }, - // }, - // { - // skipDoc: true, - // expression: `split("; ")`, - // expected: []string{}, - // }, + { + description: "Match without global flag", + document: `cat cat`, + expression: `match("cat")`, + expected: []string{ + "D0, P[], ()::string: cat\noffset: 0\nlength: 3\ncaptures: []\n", + }, + }, + { + description: "Match with global flag", + document: `cat cat`, + expression: `match("cat"; "g")`, + expected: []string{ + "D0, P[], ()::string: cat\noffset: 0\nlength: 3\ncaptures: []\n", + "D0, P[], ()::string: cat\noffset: 4\nlength: 3\ncaptures: []\n", + }, + }, + { + description: "Substitute / Replace string", + subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", + document: `a: dogs are great`, + expression: `.a |= sub("dogs", "cats")`, + expected: []string{ + "D0, P[], (doc)::a: cats are great\n", + }, + }, + { + description: "Substitute / Replace string with regex", + subdescription: "This uses golang regex, described [here](https://github.com/google/re2/wiki/Syntax)\nNote the use of `|=` to run in context of the current string value.", + document: "a: cat\nb: heat", + expression: `.[] |= sub("(a)", "${1}r")`, + expected: []string{ + "D0, P[], (doc)::a: cart\nb: heart\n", + }, + }, + { + description: "Split strings", + document: `"cat; meow; 1; ; true"`, + expression: `split("; ")`, + expected: []string{ + "D0, P[], (!!seq)::- cat\n- meow\n- \"1\"\n- \"\"\n- \"true\"\n", + }, + }, + { + description: "Split strings one match", + document: `"word"`, + expression: `split("; ")`, + expected: []string{ + "D0, P[], (!!seq)::- word\n", + }, + }, + { + skipDoc: true, + document: `""`, + expression: `split("; ")`, + expected: []string{ + "D0, P[], (!!seq)::[]\n", // dont actually want this, just not to error + }, + }, + { + skipDoc: true, + expression: `split("; ")`, + expected: []string{}, + }, } func TestStringsOperatorScenarios(t *testing.T) {