mirror of
https://github.com/mikefarah/yq.git
synced 2026-07-05 20:15:36 +00:00
Add string slicing support (#2639)
* Initial plan * Add string slicing support to yq Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/a8525fbb-77a7-4bb0-a3a7-b24f99ae8710 Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com> * Fix sliceStringNode signature and fix test descriptions/expressions Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/58726b13-68ae-4f93-971f-eb70459edcf4 Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com> * Update pkg/yqlib/operator_slice.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix array slice out-of-bounds panic with very negative indices Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/7c146762-d251-45fd-8555-2488f59fc57b Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com> * S2-S4: tighten lexer condition, fix doc header, add Unicode example Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/ec06083e-e20a-45d2-bf7e-4e1fa7be1073 Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com> * Fix spelling: multibyte -> multi-byte in Unicode test subdescription Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/6e7b304b-5b52-4e89-8bad-ba22813305c7 Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com> Co-authored-by: Mike Farah <mikefarah@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
parent
68f0322ba3
commit
3b2423e871
@ -1,5 +1,5 @@
|
|||||||
# Slice/Splice Array
|
# Slice Array or String
|
||||||
|
|
||||||
The slice array operator takes an array as input and returns a subarray. Like the `jq` equivalent, `.[10:15]` will return an array of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array.
|
The slice operator works on both arrays and strings. Like the `jq` equivalent, `.[10:15]` will return a subarray (or substring) of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array or string.
|
||||||
|
|
||||||
You may leave out the first or second number, which will refer to the start or end of the array respectively.
|
You may leave out the first or second number, which will refer to the start or end of the array or string respectively.
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
# Slice/Splice Array
|
# Slice Array or String
|
||||||
|
|
||||||
The slice array operator takes an array as input and returns a subarray. Like the `jq` equivalent, `.[10:15]` will return an array of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array.
|
The slice operator works on both arrays and strings. Like the `jq` equivalent, `.[10:15]` will return a subarray (or substring) of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array or string.
|
||||||
|
|
||||||
You may leave out the first or second number, which will refer to the start or end of the array respectively.
|
You may leave out the first or second number, which will refer to the start or end of the array or string respectively.
|
||||||
|
|
||||||
## Slicing arrays
|
## Slicing arrays
|
||||||
Given a sample.yml file of:
|
Given a sample.yml file of:
|
||||||
@ -103,3 +103,81 @@ will output
|
|||||||
- cow
|
- cow
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Slicing strings
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
country: Australia
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq '.country[0:5]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
Austr
|
||||||
|
```
|
||||||
|
|
||||||
|
## Slicing strings - without the second number
|
||||||
|
Finishes at the end of the string
|
||||||
|
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
country: Australia
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq '.country[5:]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
alia
|
||||||
|
```
|
||||||
|
|
||||||
|
## Slicing strings - without the first number
|
||||||
|
Starts from the start of the string
|
||||||
|
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
country: Australia
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq '.country[:5]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
Austr
|
||||||
|
```
|
||||||
|
|
||||||
|
## Slicing strings - use negative numbers to count backwards from the end
|
||||||
|
Negative indices count from the end of the string
|
||||||
|
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
country: Australia
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq '.country[-5:]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
ralia
|
||||||
|
```
|
||||||
|
|
||||||
|
## Slicing strings - Unicode
|
||||||
|
Indices are rune-based, so multi-byte characters are handled correctly
|
||||||
|
|
||||||
|
Given a sample.yml file of:
|
||||||
|
```yaml
|
||||||
|
greeting: héllo
|
||||||
|
```
|
||||||
|
then
|
||||||
|
```bash
|
||||||
|
yq '.greeting[1:3]' sample.yml
|
||||||
|
```
|
||||||
|
will output
|
||||||
|
```yaml
|
||||||
|
él
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
@ -131,6 +131,11 @@ func handleToken(tokens []*token, index int, postProcessedTokens []*token) (toke
|
|||||||
log.Debugf("previous token is : traverseArrayOpType")
|
log.Debugf("previous token is : traverseArrayOpType")
|
||||||
// need to put the number 0 before this token, as that is implied
|
// need to put the number 0 before this token, as that is implied
|
||||||
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")})
|
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")})
|
||||||
|
} else if index >= 2 && tokens[index-1].TokenType == openCollect &&
|
||||||
|
(tokens[index-2].TokenType == operationToken || tokens[index-2].TokenType == closeCollect || tokens[index-2].TokenType == closeCollectObject) {
|
||||||
|
log.Debugf("previous token is : openCollect following a traversal, implying 0 start")
|
||||||
|
// need to put the number 0 before this token, as that is implied
|
||||||
|
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -31,6 +31,24 @@ func clampSliceIndex(index, length int) int {
|
|||||||
return index
|
return index
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sliceStringNode(lhsNode *CandidateNode, firstNumber int, secondNumber int) *CandidateNode {
|
||||||
|
runes := []rune(lhsNode.Value)
|
||||||
|
length := len(runes)
|
||||||
|
|
||||||
|
relativeFirstNumber := clampSliceIndex(firstNumber, length)
|
||||||
|
relativeSecondNumber := clampSliceIndex(secondNumber, length)
|
||||||
|
if relativeSecondNumber < relativeFirstNumber {
|
||||||
|
relativeSecondNumber = relativeFirstNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("sliceStringNode: slice from %v to %v", relativeFirstNumber, relativeSecondNumber)
|
||||||
|
|
||||||
|
slicedString := string(runes[relativeFirstNumber:relativeSecondNumber])
|
||||||
|
replacement := lhsNode.CreateReplacement(ScalarNode, lhsNode.Tag, slicedString)
|
||||||
|
replacement.Style = lhsNode.Style
|
||||||
|
return replacement
|
||||||
|
}
|
||||||
|
|
||||||
func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
|
||||||
|
|
||||||
log.Debug("slice array operator!")
|
log.Debug("slice array operator!")
|
||||||
@ -43,16 +61,21 @@ func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *E
|
|||||||
lhsNode := el.Value.(*CandidateNode)
|
lhsNode := el.Value.(*CandidateNode)
|
||||||
|
|
||||||
firstNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.LHS)
|
firstNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.LHS)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Context{}, err
|
return Context{}, err
|
||||||
}
|
}
|
||||||
relativeFirstNumber := clampSliceIndex(firstNumber, len(lhsNode.Content))
|
|
||||||
|
|
||||||
secondNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.RHS)
|
secondNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.RHS)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Context{}, err
|
return Context{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if lhsNode.Kind == ScalarNode && lhsNode.guessTagFromCustomType() == "!!str" {
|
||||||
|
results.PushBack(sliceStringNode(lhsNode, firstNumber, secondNumber))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
relativeFirstNumber := clampSliceIndex(firstNumber, len(lhsNode.Content))
|
||||||
relativeSecondNumber := clampSliceIndex(secondNumber, len(lhsNode.Content))
|
relativeSecondNumber := clampSliceIndex(secondNumber, len(lhsNode.Content))
|
||||||
|
|
||||||
log.Debugf("calculateIndicesToTraverse: slice from %v to %v", relativeFirstNumber, relativeSecondNumber)
|
log.Debugf("calculateIndicesToTraverse: slice from %v to %v", relativeFirstNumber, relativeSecondNumber)
|
||||||
|
|||||||
@ -129,6 +129,84 @@ var sliceArrayScenarios = []expressionScenario{
|
|||||||
"D0, P[], (!!seq)::[]\n",
|
"D0, P[], (!!seq)::[]\n",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
description: "Slicing strings",
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[0:5]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::Austr\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Slicing strings - without the second number",
|
||||||
|
subdescription: "Finishes at the end of the string",
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[5:]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::alia\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Slicing strings - without the first number",
|
||||||
|
subdescription: "Starts from the start of the string",
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[:5]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::Austr\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Slicing strings - use negative numbers to count backwards from the end",
|
||||||
|
subdescription: "Negative indices count from the end of the string",
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[-5:]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::ralia\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
skipDoc: true,
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[1:-1]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::ustrali\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
skipDoc: true,
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[:]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::Australia\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
skipDoc: true,
|
||||||
|
description: "second index beyond string length clamps",
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[:100]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::Australia\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
skipDoc: true,
|
||||||
|
description: "first index beyond string length returns empty string",
|
||||||
|
document: `country: Australia`,
|
||||||
|
expression: `.country[100:]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[country], (!!str)::\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Slicing strings - Unicode",
|
||||||
|
subdescription: "Indices are rune-based, so multi-byte characters are handled correctly",
|
||||||
|
document: `greeting: héllo`,
|
||||||
|
expression: `.greeting[1:3]`,
|
||||||
|
expected: []string{
|
||||||
|
"D0, P[greeting], (!!str)::él\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSliceOperatorScenarios(t *testing.T) {
|
func TestSliceOperatorScenarios(t *testing.T) {
|
||||||
|
|||||||
@ -99,7 +99,11 @@ func traverseArrayOperator(d *dataTreeNavigator, context Context, expressionNode
|
|||||||
log.Debugf("--traverseArrayOperator")
|
log.Debugf("--traverseArrayOperator")
|
||||||
|
|
||||||
if expressionNode.RHS != nil && expressionNode.RHS.RHS != nil && expressionNode.RHS.RHS.Operation.OperationType == createMapOpType {
|
if expressionNode.RHS != nil && expressionNode.RHS.RHS != nil && expressionNode.RHS.RHS.Operation.OperationType == createMapOpType {
|
||||||
return sliceArrayOperator(d, context, expressionNode.RHS.RHS)
|
lhsContext, err := d.GetMatchingNodes(context, expressionNode.LHS)
|
||||||
|
if err != nil {
|
||||||
|
return Context{}, err
|
||||||
|
}
|
||||||
|
return sliceArrayOperator(d, lhsContext, expressionNode.RHS.RHS)
|
||||||
}
|
}
|
||||||
|
|
||||||
lhs, err := d.GetMatchingNodes(context, expressionNode.LHS)
|
lhs, err := d.GetMatchingNodes(context, expressionNode.LHS)
|
||||||
|
|||||||
@ -299,3 +299,8 @@ Ffile
|
|||||||
Fquery
|
Fquery
|
||||||
coverpkg
|
coverpkg
|
||||||
gsub
|
gsub
|
||||||
|
ralia
|
||||||
|
Austr
|
||||||
|
ustrali
|
||||||
|
héllo
|
||||||
|
alia
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user