mirror of
https://github.com/mikefarah/yq.git
synced 2025-01-23 14:16:10 +00:00
Added capture regex operator
This commit is contained in:
parent
3394feee0d
commit
5c0a5bd9d3
@ -24,76 +24,110 @@ cat; meow; 1; ; true
|
||||
## Match string
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
cat
|
||||
foo bar foo
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq eval 'match("at")' sample.yml
|
||||
yq eval 'match("foo")' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
string: at
|
||||
offset: 1
|
||||
length: 2
|
||||
string: foo
|
||||
offset: 0
|
||||
length: 3
|
||||
captures: []
|
||||
```
|
||||
|
||||
## Match string, case insensitive
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
cAt
|
||||
foo bar FOO
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq eval 'match("(?i)at")' sample.yml
|
||||
yq eval 'match("(?i)foo"; "g")' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
string: At
|
||||
offset: 1
|
||||
length: 2
|
||||
string: foo
|
||||
offset: 0
|
||||
length: 3
|
||||
captures: []
|
||||
string: FOO
|
||||
offset: 8
|
||||
length: 3
|
||||
captures: []
|
||||
```
|
||||
|
||||
## Match with capture groups
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
a cat
|
||||
abc abc
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq eval 'match("c(.t)")' sample.yml
|
||||
yq eval 'match("(abc)+"; "g")' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
string: cat
|
||||
offset: 2
|
||||
string: abc
|
||||
offset: 0
|
||||
length: 3
|
||||
captures:
|
||||
- string: at
|
||||
offset: 3
|
||||
length: 2
|
||||
- string: abc
|
||||
offset: 0
|
||||
length: 3
|
||||
string: abc
|
||||
offset: 4
|
||||
length: 3
|
||||
captures:
|
||||
- string: abc
|
||||
offset: 4
|
||||
length: 3
|
||||
```
|
||||
|
||||
## Match with named capture groups
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
a cat
|
||||
foo bar foo foo foo
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq eval 'match("c(?P<cool>.t)")' sample.yml
|
||||
yq eval 'match("foo (?P<bar123>bar)? foo"; "g")' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
string: cat
|
||||
offset: 2
|
||||
length: 3
|
||||
string: foo bar foo
|
||||
offset: 0
|
||||
length: 11
|
||||
captures:
|
||||
- string: at
|
||||
offset: 3
|
||||
length: 2
|
||||
name: cool
|
||||
- string: bar
|
||||
offset: 4
|
||||
length: 3
|
||||
name: bar123
|
||||
string: foo foo
|
||||
offset: 12
|
||||
length: 8
|
||||
captures:
|
||||
- string: null
|
||||
offset: -1
|
||||
length: 0
|
||||
name: bar123
|
||||
```
|
||||
|
||||
## Capture named groups into a map
|
||||
Given a sample.yml file of:
|
||||
```yaml
|
||||
xyzzy-14
|
||||
```
|
||||
then
|
||||
```bash
|
||||
yq eval 'capture("(?P<a>[a-z]+)-(?P<n>[0-9]+)")' sample.yml
|
||||
```
|
||||
will output
|
||||
```yaml
|
||||
a: xyzzy
|
||||
n: "14"
|
||||
```
|
||||
|
||||
## Match without global flag
|
||||
|
@ -277,6 +277,7 @@ func initLexer() (*lex.Lexer, error) {
|
||||
lexer.Add([]byte(`join`), opToken(joinStringOpType))
|
||||
lexer.Add([]byte(`sub`), opToken(subStringOpType))
|
||||
lexer.Add([]byte(`match`), opToken(matchOpType))
|
||||
lexer.Add([]byte(`capture`), opToken(captureOpType))
|
||||
lexer.Add([]byte(`test`), opToken(testOpType))
|
||||
|
||||
lexer.Add([]byte(`any`), opToken(anyOpType))
|
||||
|
@ -84,7 +84,8 @@ var sortKeysOpType = &operationType{Type: "SORT_KEYS", NumArgs: 1, Precedence: 5
|
||||
var joinStringOpType = &operationType{Type: "JOIN", NumArgs: 1, Precedence: 50, Handler: joinStringOperator}
|
||||
var subStringOpType = &operationType{Type: "SUBSTR", NumArgs: 1, Precedence: 50, Handler: substituteStringOperator}
|
||||
var matchOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: matchOperator}
|
||||
var testOpType = &operationType{Type: "MATCH", NumArgs: 1, Precedence: 50, Handler: testOperator}
|
||||
var captureOpType = &operationType{Type: "CAPTURE", NumArgs: 1, Precedence: 50, Handler: captureOperator}
|
||||
var testOpType = &operationType{Type: "TEST", NumArgs: 1, Precedence: 50, Handler: testOperator}
|
||||
var splitStringOpType = &operationType{Type: "SPLIT", NumArgs: 1, Precedence: 50, Handler: splitStringOperator}
|
||||
|
||||
var keysOpType = &operationType{Type: "KEYS", NumArgs: 0, Precedence: 50, Handler: keysOperator}
|
||||
|
@ -75,9 +75,22 @@ func substituteStringOperator(d *dataTreeNavigator, context Context, expressionN
|
||||
}
|
||||
|
||||
func addMatch(original []*yaml.Node, match string, offset int, name string) []*yaml.Node {
|
||||
|
||||
newContent := append(original,
|
||||
createScalarNode("string", "string"),
|
||||
createScalarNode("string", "string"))
|
||||
|
||||
if offset < 0 {
|
||||
// offset of -1 means there was no match, force a null value like jq
|
||||
newContent = append(newContent,
|
||||
createScalarNode(nil, "null"),
|
||||
)
|
||||
} else {
|
||||
newContent = append(newContent,
|
||||
createScalarNode(match, match),
|
||||
)
|
||||
}
|
||||
|
||||
newContent = append(newContent,
|
||||
createScalarNode("offset", "offset"),
|
||||
createScalarNode(offset, fmt.Sprintf("%v", offset)),
|
||||
createScalarNode("length", "length"),
|
||||
@ -96,11 +109,7 @@ type matchPreferences struct {
|
||||
Global bool
|
||||
}
|
||||
|
||||
func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
|
||||
|
||||
subNames := regEx.SubexpNames()
|
||||
log.Debugf("subNames %v", subNames)
|
||||
|
||||
func getMatches(matchPrefs matchPreferences, regEx *regexp.Regexp, value string) ([][]string, [][]int) {
|
||||
var allMatches [][]string
|
||||
var allIndices [][]int
|
||||
|
||||
@ -113,6 +122,12 @@ func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *Candida
|
||||
}
|
||||
|
||||
log.Debug("allMatches, %v", allMatches)
|
||||
return allMatches, allIndices
|
||||
}
|
||||
|
||||
func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
|
||||
subNames := regEx.SubexpNames()
|
||||
allMatches, allIndices := getMatches(matchPrefs, regEx, value)
|
||||
|
||||
// if all matches just has an empty array in it,
|
||||
// then nothing matched
|
||||
@ -141,6 +156,43 @@ func match(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *Candida
|
||||
|
||||
}
|
||||
|
||||
func capture(matchPrefs matchPreferences, regEx *regexp.Regexp, candidate *CandidateNode, value string, results *list.List) {
|
||||
subNames := regEx.SubexpNames()
|
||||
allMatches, allIndices := getMatches(matchPrefs, regEx, value)
|
||||
|
||||
// if all matches just has an empty array in it,
|
||||
// then nothing matched
|
||||
if len(allMatches) > 0 && len(allMatches[0]) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for i, matches := range allMatches {
|
||||
capturesNode := &yaml.Node{Kind: yaml.MappingNode}
|
||||
|
||||
_, submatches := matches[0], matches[1:]
|
||||
for j, submatch := range submatches {
|
||||
capturesNode.Content = append(capturesNode.Content,
|
||||
createScalarNode(subNames[j+1], subNames[j+1]))
|
||||
|
||||
offset := allIndices[i][2+j*2]
|
||||
// offset of -1 means there was no match, force a null value like jq
|
||||
if offset < 0 {
|
||||
capturesNode.Content = append(capturesNode.Content,
|
||||
createScalarNode(nil, "null"),
|
||||
)
|
||||
} else {
|
||||
capturesNode.Content = append(capturesNode.Content,
|
||||
createScalarNode(submatch, submatch),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
results.PushBack(candidate.CreateChild(nil, capturesNode))
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func extractMatchArguments(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (*regexp.Regexp, matchPreferences, error) {
|
||||
regExExpNode := expressionNode.Rhs
|
||||
|
||||
@ -205,6 +257,27 @@ func matchOperator(d *dataTreeNavigator, context Context, expressionNode *Expres
|
||||
return context.ChildContext(results), nil
|
||||
}
|
||||
|
||||
func captureOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
||||
regEx, matchPrefs, err := extractMatchArguments(d, context, expressionNode)
|
||||
if err != nil {
|
||||
return Context{}, err
|
||||
}
|
||||
|
||||
var results = list.New()
|
||||
|
||||
for el := context.MatchingNodes.Front(); el != nil; el = el.Next() {
|
||||
candidate := el.Value.(*CandidateNode)
|
||||
node := unwrapDoc(candidate.Node)
|
||||
if node.Tag != "!!str" {
|
||||
return Context{}, fmt.Errorf("cannot match with %v, can only match strings. Hint: Most often you'll want to use '|=' over '=' for this operation", node.Tag)
|
||||
}
|
||||
capture(matchPrefs, regEx, candidate, node.Value, results)
|
||||
|
||||
}
|
||||
|
||||
return context.ChildContext(results), nil
|
||||
}
|
||||
|
||||
func testOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
||||
regEx, _, err := extractMatchArguments(d, context, expressionNode)
|
||||
if err != nil {
|
||||
|
@ -15,34 +15,54 @@ var stringsOperatorScenarios = []expressionScenario{
|
||||
},
|
||||
{
|
||||
description: "Match string",
|
||||
document: `cat`,
|
||||
expression: `match("at")`,
|
||||
document: `foo bar foo`,
|
||||
expression: `match("foo")`,
|
||||
expected: []string{
|
||||
"D0, P[], ()::string: at\noffset: 1\nlength: 2\ncaptures: []\n",
|
||||
"D0, P[], ()::string: foo\noffset: 0\nlength: 3\ncaptures: []\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Match string, case insensitive",
|
||||
document: `cAt`,
|
||||
expression: `match("(?i)at")`,
|
||||
document: `foo bar FOO`,
|
||||
expression: `match("(?i)foo"; "g")`,
|
||||
expected: []string{
|
||||
"D0, P[], ()::string: At\noffset: 1\nlength: 2\ncaptures: []\n",
|
||||
"D0, P[], ()::string: foo\noffset: 0\nlength: 3\ncaptures: []\n",
|
||||
"D0, P[], ()::string: FOO\noffset: 8\nlength: 3\ncaptures: []\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Match with capture groups",
|
||||
document: `a cat`,
|
||||
expression: `match("c(.t)")`,
|
||||
document: `abc abc`,
|
||||
expression: `match("(abc)+"; "g")`,
|
||||
expected: []string{
|
||||
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n",
|
||||
"D0, P[], ()::string: abc\noffset: 0\nlength: 3\ncaptures:\n - string: abc\n offset: 0\n length: 3\n",
|
||||
"D0, P[], ()::string: abc\noffset: 4\nlength: 3\ncaptures:\n - string: abc\n offset: 4\n length: 3\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Match with named capture groups",
|
||||
document: `a cat`,
|
||||
expression: `match("c(?P<cool>.t)")`,
|
||||
document: `foo bar foo foo foo`,
|
||||
expression: `match("foo (?P<bar123>bar)? foo"; "g")`,
|
||||
expected: []string{
|
||||
"D0, P[], ()::string: cat\noffset: 2\nlength: 3\ncaptures:\n - string: at\n offset: 3\n length: 2\n name: cool\n",
|
||||
"D0, P[], ()::string: foo bar foo\noffset: 0\nlength: 11\ncaptures:\n - string: bar\n offset: 4\n length: 3\n name: bar123\n",
|
||||
"D0, P[], ()::string: foo foo\noffset: 12\nlength: 8\ncaptures:\n - string: null\n offset: -1\n length: 0\n name: bar123\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Capture named groups into a map",
|
||||
document: `xyzzy-14`,
|
||||
expression: `capture("(?P<a>[a-z]+)-(?P<n>[0-9]+)")`,
|
||||
expected: []string{
|
||||
"D0, P[], ()::a: xyzzy\nn: \"14\"\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
skipDoc: true,
|
||||
description: "Capture named groups into a map, with null",
|
||||
document: `xyzzy-14`,
|
||||
expression: `capture("(?P<a>[a-z]+)-(?P<n>[0-9]+)(?P<bar123>bar)?")`,
|
||||
expected: []string{
|
||||
"D0, P[], ()::a: xyzzy\nn: \"14\"\nbar123: null\n",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user