Strip whitespace when decoding base64 #2507

This commit is contained in:
Mike Farah 2025-11-15 14:10:46 +11:00
parent e056b91a00
commit 258b84a05e
4 changed files with 385 additions and 23 deletions

269
pkg/yqlib/base64_test.go Normal file
View File

@ -0,0 +1,269 @@
//go:build !yq_nobase64
package yqlib
import (
"bufio"
"fmt"
"testing"
"github.com/mikefarah/yq/v4/test"
)
const base64EncodedSimple = "YSBzcGVjaWFsIHN0cmluZw=="
const base64DecodedSimpleExtraSpaces = "\n " + base64EncodedSimple + " \n"
const base64DecodedSimple = "a special string"
const base64EncodedUTF8 = "V29ya3Mgd2l0aCBVVEYtMTYg8J+Yig=="
const base64DecodedUTF8 = "Works with UTF-16 😊"
const base64EncodedYaml = "YTogYXBwbGUK"
const base64DecodedYaml = "a: apple\n"
const base64EncodedEmpty = ""
const base64DecodedEmpty = ""
const base64MissingPadding = "Y2F0cw"
const base64DecodedMissingPadding = "cats"
const base64EncodedCats = "Y2F0cw=="
const base64DecodedCats = "cats"
var base64Scenarios = []formatScenario{
{
skipDoc: true,
description: "empty decode",
input: base64EncodedEmpty,
expected: base64DecodedEmpty + "\n",
scenarioType: "decode",
},
{
skipDoc: true,
description: "simple decode",
input: base64EncodedSimple,
expected: base64DecodedSimple + "\n",
scenarioType: "decode",
},
{
description: "Decode base64: simple",
subdescription: "Decoded data is assumed to be a string.",
input: base64EncodedSimple,
expected: base64DecodedSimple + "\n",
scenarioType: "decode",
},
{
description: "Decode base64: UTF-8",
subdescription: "Base64 decoding supports UTF-8 encoded strings.",
input: base64EncodedUTF8,
expected: base64DecodedUTF8 + "\n",
scenarioType: "decode",
},
{
skipDoc: true,
description: "decode missing padding",
input: base64MissingPadding,
expected: base64DecodedMissingPadding + "\n",
scenarioType: "decode",
},
{
description: "Decode with extra spaces",
subdescription: "Extra leading/trailing whitespace is stripped",
input: base64DecodedSimpleExtraSpaces,
expected: base64DecodedSimple + "\n",
scenarioType: "decode",
},
{
skipDoc: true,
description: "decode with padding",
input: base64EncodedCats,
expected: base64DecodedCats + "\n",
scenarioType: "decode",
},
{
skipDoc: true,
description: "decode yaml document",
input: base64EncodedYaml,
expected: base64DecodedYaml + "\n",
scenarioType: "decode",
},
{
description: "Encode base64: string",
input: "\"" + base64DecodedSimple + "\"",
expected: base64EncodedSimple,
scenarioType: "encode",
},
{
description: "Encode base64: string from document",
subdescription: "Extract a string field and encode it to base64.",
input: "coolData: \"" + base64DecodedSimple + "\"",
expression: ".coolData",
expected: base64EncodedSimple,
scenarioType: "encode",
},
{
skipDoc: true,
description: "encode empty string",
input: "\"\"",
expected: "",
scenarioType: "encode",
},
{
skipDoc: true,
description: "encode UTF-8 string",
input: "\"" + base64DecodedUTF8 + "\"",
expected: base64EncodedUTF8,
scenarioType: "encode",
},
{
skipDoc: true,
description: "encode cats",
input: "\"" + base64DecodedCats + "\"",
expected: base64EncodedCats,
scenarioType: "encode",
},
{
description: "Roundtrip: simple",
skipDoc: true,
input: base64EncodedSimple,
expected: base64EncodedSimple,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: UTF-8",
skipDoc: true,
input: base64EncodedUTF8,
expected: base64EncodedUTF8,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: missing padding",
skipDoc: true,
input: base64MissingPadding,
expected: base64EncodedCats,
scenarioType: "roundtrip",
},
{
description: "Roundtrip: empty",
skipDoc: true,
input: base64EncodedEmpty,
expected: base64EncodedEmpty,
scenarioType: "roundtrip",
},
{
description: "Encode error: non-string",
skipDoc: true,
input: "123",
expectedError: "cannot encode !!int as base64, can only operate on strings",
scenarioType: "encode-error",
},
{
description: "Encode error: array",
skipDoc: true,
input: "[1, 2, 3]",
expectedError: "cannot encode !!seq as base64, can only operate on strings",
scenarioType: "encode-error",
},
{
description: "Encode error: map",
skipDoc: true,
input: "{b: c}",
expectedError: "cannot encode !!map as base64, can only operate on strings",
scenarioType: "encode-error",
},
}
func testBase64Scenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "", "decode":
yamlPrefs := ConfiguredYamlPreferences.Copy()
yamlPrefs.Indent = 4
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewBase64Decoder(), NewYamlEncoder(yamlPrefs)), s.description)
case "encode":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewBase64Encoder()), s.description)
case "roundtrip":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewBase64Decoder(), NewBase64Encoder()), s.description)
case "encode-error":
result, err := processFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewBase64Encoder())
if err == nil {
t.Errorf("Expected error '%v' but it worked: %v", s.expectedError, result)
} else {
test.AssertResultComplexWithContext(t, s.expectedError, err.Error(), s.description)
}
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}
func documentBase64Scenario(_ *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
return
}
switch s.scenarioType {
case "", "decode":
documentBase64DecodeScenario(w, s)
case "encode":
documentBase64EncodeScenario(w, s)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}
func documentBase64DecodeScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.txt file of:\n")
writeOrPanic(w, fmt.Sprintf("```\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression == "" {
expression = "."
}
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=base64 -oy '%v' sample.txt\n```\n", expression))
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", mustProcessFormatScenario(s, NewBase64Decoder(), NewYamlEncoder(ConfiguredYamlPreferences))))
}
func documentBase64EncodeScenario(w *bufio.Writer, s formatScenario) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}
writeOrPanic(w, "Given a sample.yml file of:\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
writeOrPanic(w, "then\n")
expression := s.expression
if expression == "" {
expression = "."
}
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=base64 '%v' sample.yml\n```\n", expression))
writeOrPanic(w, "will output\n")
writeOrPanic(w, fmt.Sprintf("```\n%v```\n\n", mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewBase64Encoder())))
}
func TestBase64Scenarios(t *testing.T) {
for _, tt := range base64Scenarios {
testBase64Scenario(t, tt)
}
genericScenarios := make([]interface{}, len(base64Scenarios))
for i, s := range base64Scenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "base64", genericScenarios, documentBase64Scenario)
}

View File

@ -9,28 +9,6 @@ import (
"strings"
)
type base64Padder struct {
count int
io.Reader
}
func (c *base64Padder) pad(buf []byte) (int, error) {
pad := strings.Repeat("=", (4 - c.count%4))
n, err := strings.NewReader(pad).Read(buf)
c.count += n
return n, err
}
func (c *base64Padder) Read(buf []byte) (int, error) {
n, err := c.Reader.Read(buf)
c.count += n
if err == io.EOF && c.count%4 != 0 {
return c.pad(buf)
}
return n, err
}
type base64Decoder struct {
reader io.Reader
finished bool
@ -43,7 +21,25 @@ func NewBase64Decoder() Decoder {
}
func (dec *base64Decoder) Init(reader io.Reader) error {
dec.reader = &base64Padder{Reader: reader}
// Read all data from the reader and strip leading/trailing whitespace
// This is necessary because base64 decoding needs to see the complete input
// to handle padding correctly, and we need to strip whitespace before decoding.
buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(reader); err != nil {
return err
}
// Strip leading and trailing whitespace
stripped := strings.TrimSpace(buf.String())
// Add padding if needed (base64 strings should be a multiple of 4 characters)
padLen := len(stripped) % 4
if padLen > 0 {
stripped += strings.Repeat("=", 4-padLen)
}
// Create a new reader from the stripped and padded data
dec.reader = strings.NewReader(stripped)
dec.readAnything = false
dec.finished = false
return nil

View File

@ -0,0 +1,88 @@
# Base64
Encode and decode to and from Base64.
Base64 assumes [RFC4648](https://rfc-editor.org/rfc/rfc4648.html) encoding. Encoding and decoding both assume that the content is a UTF-8 string and not binary content.
See below for examples
## Decode base64: simple
Decoded data is assumed to be a string.
Given a sample.txt file of:
```
YSBzcGVjaWFsIHN0cmluZw==
```
then
```bash
yq -p=base64 -oy '.' sample.txt
```
will output
```yaml
a special string
```
## Decode base64: UTF-8
Base64 decoding supports UTF-8 encoded strings.
Given a sample.txt file of:
```
V29ya3Mgd2l0aCBVVEYtMTYg8J+Yig==
```
then
```bash
yq -p=base64 -oy '.' sample.txt
```
will output
```yaml
Works with UTF-16 😊
```
## Decode with extra spaces
Extra leading/trailing whitespace is stripped
Given a sample.txt file of:
```
YSBzcGVjaWFsIHN0cmluZw==
```
then
```bash
yq -p=base64 -oy '.' sample.txt
```
will output
```yaml
a special string
```
## Encode base64: string
Given a sample.yml file of:
```yaml
"a special string"
```
then
```bash
yq -o=base64 '.' sample.yml
```
will output
```
YSBzcGVjaWFsIHN0cmluZw==```
## Encode base64: string from document
Extract a string field and encode it to base64.
Given a sample.yml file of:
```yaml
coolData: "a special string"
```
then
```bash
yq -o=base64 '.coolData' sample.yml
```
will output
```
YSBzcGVjaWFsIHN0cmluZw==```

View File

@ -0,0 +1,9 @@
# Base64
Encode and decode to and from Base64.
Base64 assumes [RFC4648](https://rfc-editor.org/rfc/rfc4648.html) encoding. Encoding and decoding both assume that the content is a UTF-8 string and not binary content.
See below for examples