Add string slicing support (#2639)

* Initial plan

* Add string slicing support to yq

Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/a8525fbb-77a7-4bb0-a3a7-b24f99ae8710

Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>

* Fix sliceStringNode signature and fix test descriptions/expressions

Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/58726b13-68ae-4f93-971f-eb70459edcf4

Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>

* Update pkg/yqlib/operator_slice.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Fix array slice out-of-bounds panic with very negative indices

Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/7c146762-d251-45fd-8555-2488f59fc57b

Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>

* S2-S4: tighten lexer condition, fix doc header, add Unicode example

Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/ec06083e-e20a-45d2-bf7e-4e1fa7be1073

Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>

* Fix spelling: multibyte -> multi-byte in Unicode test subdescription

Agent-Logs-Url: https://github.com/mikefarah/yq/sessions/6e7b304b-5b52-4e89-8bad-ba22813305c7

Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mikefarah <1151925+mikefarah@users.noreply.github.com>
Co-authored-by: Mike Farah <mikefarah@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot 2026-04-06 19:29:07 +10:00 committed by GitHub
parent 68f0322ba3
commit 3b2423e871
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 203 additions and 10 deletions

View File

@ -1,5 +1,5 @@
# Slice/Splice Array
# Slice Array or String
The slice array operator takes an array as input and returns a subarray. Like the `jq` equivalent, `.[10:15]` will return an array of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array.
The slice operator works on both arrays and strings. Like the `jq` equivalent, `.[10:15]` will return a subarray (or substring) of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array or string.
You may leave out the first or second number, which will refer to the start or end of the array respectively.
You may leave out the first or second number, which will refer to the start or end of the array or string respectively.

View File

@ -1,8 +1,8 @@
# Slice/Splice Array
# Slice Array or String
The slice array operator takes an array as input and returns a subarray. Like the `jq` equivalent, `.[10:15]` will return an array of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array.
The slice operator works on both arrays and strings. Like the `jq` equivalent, `.[10:15]` will return a subarray (or substring) of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array or string.
You may leave out the first or second number, which will refer to the start or end of the array respectively.
You may leave out the first or second number, which will refer to the start or end of the array or string respectively.
## Slicing arrays
Given a sample.yml file of:
@ -103,3 +103,81 @@ will output
- cow
```
## Slicing strings
Given a sample.yml file of:
```yaml
country: Australia
```
then
```bash
yq '.country[0:5]' sample.yml
```
will output
```yaml
Austr
```
## Slicing strings - without the second number
Finishes at the end of the string
Given a sample.yml file of:
```yaml
country: Australia
```
then
```bash
yq '.country[5:]' sample.yml
```
will output
```yaml
alia
```
## Slicing strings - without the first number
Starts from the start of the string
Given a sample.yml file of:
```yaml
country: Australia
```
then
```bash
yq '.country[:5]' sample.yml
```
will output
```yaml
Austr
```
## Slicing strings - use negative numbers to count backwards from the end
Negative indices count from the end of the string
Given a sample.yml file of:
```yaml
country: Australia
```
then
```bash
yq '.country[-5:]' sample.yml
```
will output
```yaml
ralia
```
## Slicing strings - Unicode
Indices are rune-based, so multi-byte characters are handled correctly
Given a sample.yml file of:
```yaml
greeting: héllo
```
then
```bash
yq '.greeting[1:3]' sample.yml
```
will output
```yaml
él
```

View File

@ -131,6 +131,11 @@ func handleToken(tokens []*token, index int, postProcessedTokens []*token) (toke
log.Debugf("previous token is : traverseArrayOpType")
// need to put the number 0 before this token, as that is implied
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")})
} else if index >= 2 && tokens[index-1].TokenType == openCollect &&
(tokens[index-2].TokenType == operationToken || tokens[index-2].TokenType == closeCollect || tokens[index-2].TokenType == closeCollectObject) {
log.Debugf("previous token is : openCollect following a traversal, implying 0 start")
// need to put the number 0 before this token, as that is implied
postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")})
}
}

View File

@ -31,6 +31,24 @@ func clampSliceIndex(index, length int) int {
return index
}
func sliceStringNode(lhsNode *CandidateNode, firstNumber int, secondNumber int) *CandidateNode {
runes := []rune(lhsNode.Value)
length := len(runes)
relativeFirstNumber := clampSliceIndex(firstNumber, length)
relativeSecondNumber := clampSliceIndex(secondNumber, length)
if relativeSecondNumber < relativeFirstNumber {
relativeSecondNumber = relativeFirstNumber
}
log.Debugf("sliceStringNode: slice from %v to %v", relativeFirstNumber, relativeSecondNumber)
slicedString := string(runes[relativeFirstNumber:relativeSecondNumber])
replacement := lhsNode.CreateReplacement(ScalarNode, lhsNode.Tag, slicedString)
replacement.Style = lhsNode.Style
return replacement
}
func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) {
log.Debug("slice array operator!")
@ -43,16 +61,21 @@ func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *E
lhsNode := el.Value.(*CandidateNode)
firstNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.LHS)
if err != nil {
return Context{}, err
}
relativeFirstNumber := clampSliceIndex(firstNumber, len(lhsNode.Content))
secondNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.RHS)
if err != nil {
return Context{}, err
}
if lhsNode.Kind == ScalarNode && lhsNode.guessTagFromCustomType() == "!!str" {
results.PushBack(sliceStringNode(lhsNode, firstNumber, secondNumber))
continue
}
relativeFirstNumber := clampSliceIndex(firstNumber, len(lhsNode.Content))
relativeSecondNumber := clampSliceIndex(secondNumber, len(lhsNode.Content))
log.Debugf("calculateIndicesToTraverse: slice from %v to %v", relativeFirstNumber, relativeSecondNumber)

View File

@ -129,6 +129,84 @@ var sliceArrayScenarios = []expressionScenario{
"D0, P[], (!!seq)::[]\n",
},
},
{
description: "Slicing strings",
document: `country: Australia`,
expression: `.country[0:5]`,
expected: []string{
"D0, P[country], (!!str)::Austr\n",
},
},
{
description: "Slicing strings - without the second number",
subdescription: "Finishes at the end of the string",
document: `country: Australia`,
expression: `.country[5:]`,
expected: []string{
"D0, P[country], (!!str)::alia\n",
},
},
{
description: "Slicing strings - without the first number",
subdescription: "Starts from the start of the string",
document: `country: Australia`,
expression: `.country[:5]`,
expected: []string{
"D0, P[country], (!!str)::Austr\n",
},
},
{
description: "Slicing strings - use negative numbers to count backwards from the end",
subdescription: "Negative indices count from the end of the string",
document: `country: Australia`,
expression: `.country[-5:]`,
expected: []string{
"D0, P[country], (!!str)::ralia\n",
},
},
{
skipDoc: true,
document: `country: Australia`,
expression: `.country[1:-1]`,
expected: []string{
"D0, P[country], (!!str)::ustrali\n",
},
},
{
skipDoc: true,
document: `country: Australia`,
expression: `.country[:]`,
expected: []string{
"D0, P[country], (!!str)::Australia\n",
},
},
{
skipDoc: true,
description: "second index beyond string length clamps",
document: `country: Australia`,
expression: `.country[:100]`,
expected: []string{
"D0, P[country], (!!str)::Australia\n",
},
},
{
skipDoc: true,
description: "first index beyond string length returns empty string",
document: `country: Australia`,
expression: `.country[100:]`,
expected: []string{
"D0, P[country], (!!str)::\n",
},
},
{
description: "Slicing strings - Unicode",
subdescription: "Indices are rune-based, so multi-byte characters are handled correctly",
document: `greeting: héllo`,
expression: `.greeting[1:3]`,
expected: []string{
"D0, P[greeting], (!!str)::él\n",
},
},
}
func TestSliceOperatorScenarios(t *testing.T) {

View File

@ -99,7 +99,11 @@ func traverseArrayOperator(d *dataTreeNavigator, context Context, expressionNode
log.Debugf("--traverseArrayOperator")
if expressionNode.RHS != nil && expressionNode.RHS.RHS != nil && expressionNode.RHS.RHS.Operation.OperationType == createMapOpType {
return sliceArrayOperator(d, context, expressionNode.RHS.RHS)
lhsContext, err := d.GetMatchingNodes(context, expressionNode.LHS)
if err != nil {
return Context{}, err
}
return sliceArrayOperator(d, lhsContext, expressionNode.RHS.RHS)
}
lhs, err := d.GetMatchingNodes(context, expressionNode.LHS)

View File

@ -298,4 +298,9 @@ subsubarray
Ffile
Fquery
coverpkg
gsub
gsub
ralia
Austr
ustrali
héllo
alia