mirror of
https://github.com/mikefarah/yq.git
synced 2025-01-23 22:25:42 +00:00
Added capture regex operator
This commit is contained in:
parent
3394feee0d
commit
5c0a5bd9d3
@ -24,76 +24,110 @@ cat; meow; 1; ; true
|
|||||||
## Match string
|
## Match string
|
||||||
Given a sample.yml file of:
|
Given a sample.yml file of:
|
||||||
```yaml
|
```yaml
|
||||||
cat
|
foo bar foo
|
||||||
```
|
```
|
||||||
then
|
then
|
||||||
```bash
|
```bash
|
||||||
yq eval 'match("at")' sample.yml
|
yq eval 'match("foo")' sample.yml
|
||||||
```
|
```
|
||||||
will output
|
will output
|
||||||
```yaml
|
```yaml
|
||||||
string: at
|
string: foo
|
||||||
offset: 1
|
offset: 0
|
||||||
length: 2
|
length: 3
|
||||||
captures: []
|
captures: []
|
||||||
```
|
```
|
||||||
|
|
||||||
## Match string, case insensitive
|
## Match string, case insensitive
|
||||||
Given a sample.yml file of:
|
Given a sample.yml file of:
|
||||||
```yaml
|
```yaml
|
||||||
cAt
|
foo bar FOO
|
||||||
```
|
```
|
||||||
then
|
then
|
||||||
```bash
|
```bash
|
||||||
yq eval 'match("(?i)at")' sample.yml
|
yq eval 'match("(?i)foo"; "g")' sample.yml
|
||||||
```
|
```
|
||||||
will output
|
will output
|
||||||
```yaml
|
```yaml
|
||||||
string: At
|
string: foo
|
||||||
offset: 1
|
offset: 0
|
||||||
length: 2
|
length: 3
|
||||||
|
captures: []
|
||||||
|
string: FOO
|
||||||
|
offset: 8
|
||||||
|
length: 3
|
||||||
captures: []
|
captures: []
|
||||||
```
|
```
|
||||||
|
|
||||||
## Match with capture groups
|
## Match with capture groups
|
||||||
Given a sample.yml file of:
|
Given a sample.yml file of:
|
||||||
```yaml
|
```yaml
|
||||||
a cat
|
abc abc
|
||||||
```
|
```
|
||||||
then
|
then
|
||||||
```bash
|
```bash
|
||||||
yq eval 'match("c(.t)")' sample.yml
|
yq eval 'match("(abc)+"; "g")' sample.yml
|
||||||
```
|
```
|
||||||
will output
|
will output
|
||||||
```yaml
|
```yaml
|
||||||
string: cat
|
string: abc
|
||||||
offset: 2
|
offset: 0
|
||||||
length: 3
|
length: 3
|
||||||
captures:
|
captures:
|
||||||
- string: at
|
- string: abc
|
||||||
offset: 3
|
offset: 0
|
||||||
length: 2
|
length: 3
|
||||||
|
string: abc
|
||||||
|
offset: 4
|
||||||
|
length: 3
|
||||||
|
captures:
|
||||||
|
- string: abc
|
||||||
|
offset: 4
|
||||||
|
length: 3
|
||||||
```
|
```
|
||||||
|
|
||||||
## Match with named capture groups
|
## Match with named capture groups
|
||||||
Given a sample.yml file of:
|
Given a sample.yml file of:
|
||||||
```yaml
|
```yaml
|
||||||
a cat
|
foo bar foo foo foo
|
||||||
```
|
```
|
||||||
then
|
then
|
||||||
```bash
|
```bash
|
||||||
yq eval 'match("c(?P<cool>.t)")' sample.yml
|
yq eval 'match("foo (?P<bar123>bar)? foo"; "g")' sample.yml
|
||||||
```
|
```
|
||||||
will output
|
will output
|
||||||
```yaml
|
```yaml
|
||||||
string: cat
|
string: foo bar foo
|
||||||
offset: 2
|
offset: 0
|
||||||
length: 3
|
length: 11
|
||||||
captures:
|
captures:
|
||||||
- string: at
|
- string: bar
|
||||||
offset: 3
|
offset: 4
|
||||||
length: 2
|
length: 3
|
||||||
name: cool
|
name: bar123
|
||||||
|
string: foo foo
|
||||||
|
offset: 12
|
||||||
|
length: 8
|
||||||
|
captures:
|
||||||
|
- string: null
|
||||||
|
offset: -1
|
||||||
|
length: 0
|
||||||
|
name: bar123
|
||||||
|
```
|
||||||
|
|
||||||
|
## Capture named groups into a map
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
xyzzy-14
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq eval 'capture("(?P<a>[a-z]+)-(?P<n>[0-9]+)")' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
a: xyzzy
|
||||||
|
n: "14"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Match without global flag
|
## Match without global flag
|
||||||
|
@ -277,6 +277,7 @@ func initLexer() (*lex.Lexer, error) {
|
|||||||
lexer.Add([]byte(`join`), opToken(joinStringOpType))
|
lexer.Add([]byte(`join`), opToken(joinStringOpType))
|
||||||
lexer.Add([]byte(`sub`), opToken(subStringOpType))
|
lexer.Add([]byte(`sub`), opToken(subStringOpType))
|
||||||
lexer.Add([]byte(`match`), opToken(matchOpType))
|
lexer.Add([]byte(`match`), opToken(matchOpType))
|
||||||
|
lexer.Add([]byte(`capture`), opToken(captureOpType))
|
||||||
lexer.Add([]byte(`test`), opToken(testOpType))
|
lexer.Add([]byte(`test`), opToken(testOpType))
|
||||||
|
|
||||||
lexer.Add([]byte(`any`), opToken(anyOpType))
|
lexer.Add([]byte(`any`), opToken(anyOpType))
|
||||||
|
@ -84,7 +84,8 @@ var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 5
|
|||||||
var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator}
|
var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator}
|
||||||
var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator}
|
var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator}
|
||||||
var matchOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: matchOperator}
|
var matchOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: matchOperator}
|
||||||
var testOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: testOperator}
|
var captureOpType = &operationType{Type: "CAPTURE", NumArgs: 1, Precedence: 50, Handler: captureOperator}
|
||||||
|
var testOpType = &operationType{Type: "TEST", NumArgs: 1, Precedence: 50, Handler: testOperator}
|
||||||
var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator}
|
var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator}
|
||||||
|
|
||||||
var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator}
|
var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator}
|
||||||
|
@ -75,9 +75,22 @@ func substituteStringOperator(d *dataTreeNavigator, context Context, expressionN
|
|||||||
}
|
}
|
||||||
|
|
||||||
func addMatch(original []*yaml.Node, match string, offset int, name string) []*yaml.Node {
|
func addMatch(original []*yaml.Node, match string, offset int, name string) []*yaml.Node {
|
||||||
|
|
||||||
newContent := append(original,
|
newContent := append(original,
|
||||||
createScalarNode("string", "string"),
|
createScalarNode("string", "string"))
|
||||||
|
|
||||||
|
if offset < 0 {
|
||||||
|
// offset of -1 means there was no match, force a null value like jq
|
||||||
|
newContent = append(newContent,
|
||||||
|
createScalarNode(nil, "null"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
newContent = append(newContent,
|
||||||
createScalarNode(match, match),
|
createScalarNode(match, match),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
newContent = append(newContent,
|
||||||
createScalarNode("offset", "offset"),
|
createScalarNode("offset", "offset"),
|
||||||
createScalarNode(offset, fmt.Sprintf("%v", offset)),
|
createScalarNode(offset, fmt.Sprintf("%v", offset)),
|
||||||
createScalarNode("length", "length"),
|
createScalarNode("length", "length"),
|
||||||
@ -96,11 +109,7 @@ type matchPreferences struct {
|
|||||||
Global bool
|
Global bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
|
func getMatches(matchPrefs matchPreferences, regEx *regexp.Regexp, value string) ([][]string, [][]int) {
|
||||||
|
|
||||||
subNames := regEx.SubexpNames()
|
|
||||||
log.Debugf("subNames %v", subNames)
|
|
||||||
|
|
||||||
var allMatches [][]string
|
var allMatches [][]string
|
||||||
var allIndices [][]int
|
var allIndices [][]int
|
||||||
|
|
||||||
@ -113,6 +122,12 @@ func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *Candida
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Debug("allMatches, %v", allMatches)
|
log.Debug("allMatches, %v", allMatches)
|
||||||
|
return allMatches, allIndices
|
||||||
|
}
|
||||||
|
|
||||||
|
func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
|
||||||
|
subNames := regEx.SubexpNames()
|
||||||
|
allMatches, allIndices := getMatches(matchPrefs, regEx, value)
|
||||||
|
|
||||||
// if all matches just has an empty array in it,
|
// if all matches just has an empty array in it,
|
||||||
// then nothing matched
|
// then nothing matched
|
||||||
@ -141,6 +156,43 @@ func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *Candida
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func capture(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
|
||||||
|
subNames := regEx.SubexpNames()
|
||||||
|
allMatches, allIndices := getMatches(matchPrefs, regEx, value)
|
||||||
|
|
||||||
|
// if all matches just has an empty array in it,
|
||||||
|
// then nothing matched
|
||||||
|
if len(allMatches) > 0 && len(allMatches[0]) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, matches := range allMatches {
|
||||||
|
capturesNode := &yaml.Node{Kind: yaml.MappingNode}
|
||||||
|
|
||||||
|
_, submatches := matches[0], matches[1:]
|
||||||
|
for j, submatch := range submatches {
|
||||||
|
capturesNode.Content = append(capturesNode.Content,
|
||||||
|
createScalarNode(subNames[j+1], subNames[j+1]))
|
||||||
|
|
||||||
|
offset := allIndices[i][2+j*2]
|
||||||
|
// offset of -1 means there was no match, force a null value like jq
|
||||||
|
if offset < 0 {
|
||||||
|
capturesNode.Content = append(capturesNode.Content,
|
||||||
|
createScalarNode(nil, "null"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
capturesNode.Content = append(capturesNode.Content,
|
||||||
|
createScalarNode(submatch, submatch),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.PushBack(candidate.CreateChild(nil, capturesNode))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
func extractMatchArguments(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (*regexp.Regexp, matchPreferences, error) {
|
func extractMatchArguments(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (*regexp.Regexp, matchPreferences, error) {
|
||||||
regExExpNode := expressionNode.Rhs
|
regExExpNode := expressionNode.Rhs
|
||||||
|
|
||||||
@ -205,6 +257,27 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres
|
|||||||
return context.ChildContext(results), nil
|
return context.ChildContext(results), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func captureOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
||||||
|
regEx, matchPrefs, err := extractMatchArguments(d, context, expressionNode)
|
||||||
|
if err != nil {
|
||||||
|
return Context{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var results = list.New()
|
||||||
|
|
||||||
|
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
|
||||||
|
candidate := el.Value.(*CandidateNode)
|
||||||
|
node := unwrapDoc(candidate.Node)
|
||||||
|
if node.Tag != "!!str" {
|
||||||
|
return Context{}, fmt.Errorf("cannot match with %v, can only match strings. Hint: Most often you'll want to use '|=' over '=' for this operation", node.Tag)
|
||||||
|
}
|
||||||
|
capture(matchPrefs, regEx, candidate, node.Value, results)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.ChildContext(results), nil
|
||||||
|
}
|
||||||
|
|
||||||
func testOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
func testOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
||||||
regEx, _, err := extractMatchArguments(d, context, expressionNode)
|
regEx, _, err := extractMatchArguments(d, context, expressionNode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -15,34 +15,54 @@ var stringsOperatorScenarios = []expressionScenario{
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Match string",
|
description: "Match string",
|
||||||
document: `cat`,
|
document: `foo bar foo`,
|
||||||
expression: `match("at")`,
|
expression: `match("foo")`,
|
||||||
expected: []string{
|
expected: []string{
|
||||||
"D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
|
"D0, P[], ()::string: foo\noffset: 0\nlength: 3\ncaptures: []\n",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Match string, case insensitive",
|
description: "Match string, case insensitive",
|
||||||
document: `cAt`,
|
document: `foo bar FOO`,
|
||||||
expression: `match("(?i)at")`,
|
expression: `match("(?i)foo"; "g")`,
|
||||||
expected: []string{
|
expected: []string{
|
||||||
"D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
|
"D0, P[], ()::string: foo\noffset: 0\nlength: 3\ncaptures: []\n",
|
||||||
|
"D0, P[], ()::string: FOO\noffset: 8\nlength: 3\ncaptures: []\n",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Match with capture groups",
|
description: "Match with capture groups",
|
||||||
document: `a cat`,
|
document: `abc abc`,
|
||||||
expression: `match("c(.t)")`,
|
expression: `match("(abc)+"; "g")`,
|
||||||
expected: []string{
|
expected: []string{
|
||||||
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
|
"D0, P[], ()::string: abc\noffset: 0\nlength: 3\ncaptures:\n - string: abc\n offset: 0\n length: 3\n",
|
||||||
|
"D0, P[], ()::string: abc\noffset: 4\nlength: 3\ncaptures:\n - string: abc\n offset: 4\n length: 3\n",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Match with named capture groups",
|
description: "Match with named capture groups",
|
||||||
document: `a cat`,
|
document: `foo bar foo foo foo`,
|
||||||
expression: `match("c(?P<cool>.t)")`,
|
expression: `match("foo (?P<bar123>bar)? foo"; "g")`,
|
||||||
expected: []string{
|
expected: []string{
|
||||||
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n name: cool\n",
|
"D0, P[], ()::string: foo bar foo\noffset: 0\nlength: 11\ncaptures:\n - string: bar\n offset: 4\n length: 3\n name: bar123\n",
|
||||||
|
"D0, P[], ()::string: foo foo\noffset: 12\nlength: 8\ncaptures:\n - string: null\n offset: -1\n length: 0\n name: bar123\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Capture named groups into a map",
|
||||||
|
document: `xyzzy-14`,
|
||||||
|
expression: `capture("(?P<a>[a-z]+)-(?P<n>[0-9]+)")`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[], ()::a: xyzzy\nn: \"14\"\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
skipDoc: true,
|
||||||
|
description: "Capture named groups into a map, with null",
|
||||||
|
document: `xyzzy-14`,
|
||||||
|
expression: `capture("(?P<a>[a-z]+)-(?P<n>[0-9]+)(?P<bar123>bar)?")`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[], ()::a: xyzzy\nn: \"14\"\nbar123: null\n",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user