Fixing handling of CRLF #2352

This commit is contained in:
Mike Farah 2025-12-06 19:08:37 +11:00
parent 2072808def
commit eb3d0e63e3
3 changed files with 112 additions and 48 deletions

View File

@ -11,6 +11,13 @@ import (
yaml "go.yaml.in/yaml/v4"
)
var (
commentLineRe = regexp.MustCompile(`^\s*#`)
yamlDirectiveLineRe = regexp.MustCompile(`^\s*%YAML`)
separatorLineRe = regexp.MustCompile(`^\s*---\s*$`)
separatorPrefixRe = regexp.MustCompile(`^\s*---\s+`)
)
type yamlDecoder struct {
decoder yaml.Decoder
@ -33,51 +40,72 @@ func NewYamlDecoder(prefs YamlPreferences) Decoder {
}
func (dec *yamlDecoder) processReadStream(reader *bufio.Reader) (io.Reader, string, error) {
var commentLineRegEx = regexp.MustCompile(`^\s*#`)
var yamlDirectiveLineRegEx = regexp.MustCompile(`^\s*%YA`)
var sb strings.Builder
for {
peekBytes, err := reader.Peek(4)
if errors.Is(err, io.EOF) {
// EOF are handled else where..
return reader, sb.String(), nil
} else if err != nil {
return reader, sb.String(), err
} else if string(peekBytes[0]) == "\n" {
_, err := reader.ReadString('\n')
sb.WriteString("\n")
if errors.Is(err, io.EOF) {
return reader, sb.String(), nil
} else if err != nil {
return reader, sb.String(), err
}
} else if string(peekBytes) == "--- " {
_, err := reader.ReadString(' ')
sb.WriteString("$yqDocSeparator$\n")
if errors.Is(err, io.EOF) {
return reader, sb.String(), nil
} else if err != nil {
return reader, sb.String(), err
}
} else if string(peekBytes) == "---\n" {
_, err := reader.ReadString('\n')
sb.WriteString("$yqDocSeparator$\n")
if errors.Is(err, io.EOF) {
return reader, sb.String(), nil
} else if err != nil {
return reader, sb.String(), err
}
} else if commentLineRegEx.MatchString(string(peekBytes)) || yamlDirectiveLineRegEx.MatchString(string(peekBytes)) {
line, err := reader.ReadString('\n')
sb.WriteString(line)
if errors.Is(err, io.EOF) {
return reader, sb.String(), nil
} else if err != nil {
return reader, sb.String(), err
}
} else {
line, err := reader.ReadString('\n')
if errors.Is(err, io.EOF) && line == "" {
// no more data
return reader, sb.String(), nil
}
if err != nil && !errors.Is(err, io.EOF) {
return reader, sb.String(), err
}
// Determine newline style and strip it for inspection
newline := ""
if strings.HasSuffix(line, "\r\n") {
newline = "\r\n"
line = strings.TrimSuffix(line, "\r\n")
} else if strings.HasSuffix(line, "\n") {
newline = "\n"
line = strings.TrimSuffix(line, "\n")
}
trimmed := strings.TrimSpace(line)
// Document separator: exact line '---' or a '--- ' prefix followed by content
if separatorLineRe.MatchString(trimmed) {
sb.WriteString("$yqDocSeparator$")
sb.WriteString(newline)
if errors.Is(err, io.EOF) {
return reader, sb.String(), nil
}
continue
}
// Handle lines that start with '--- ' followed by more content (e.g. '--- cat')
if separatorPrefixRe.MatchString(line) {
match := separatorPrefixRe.FindString(line)
remainder := line[len(match):]
// normalize separator newline: if original had none, default to LF
sepNewline := newline
if sepNewline == "" {
sepNewline = "\n"
}
sb.WriteString("$yqDocSeparator$")
sb.WriteString(sepNewline)
// push the remainder back onto the reader and continue processing
reader = bufio.NewReader(io.MultiReader(strings.NewReader(remainder), reader))
if errors.Is(err, io.EOF) && remainder == "" {
return reader, sb.String(), nil
}
continue
}
// Comments, YAML directives, and blank lines are leading content
if commentLineRe.MatchString(line) || yamlDirectiveLineRe.MatchString(line) || trimmed == "" {
sb.WriteString(line)
sb.WriteString(newline)
if errors.Is(err, io.EOF) {
return reader, sb.String(), nil
}
continue
}
// First non-leading line: push it back onto a reader and return
originalLine := line + newline
return io.MultiReader(strings.NewReader(originalLine), reader), sb.String(), nil
}
}

View File

@ -5,7 +5,6 @@ import (
"bytes"
"errors"
"io"
"regexp"
"strings"
"github.com/fatih/color"
@ -37,7 +36,8 @@ func (ye *yamlEncoder) PrintDocumentSeparator(writer io.Writer) error {
func (ye *yamlEncoder) PrintLeadingContent(writer io.Writer, content string) error {
reader := bufio.NewReader(strings.NewReader(content))
var commentLineRegEx = regexp.MustCompile(`^\s*#`)
// reuse precompiled package-level regex
// (declared in decoder_yaml.go)
for {
@ -46,13 +46,19 @@ func (ye *yamlEncoder) PrintLeadingContent(writer io.Writer, content string) err
return errReading
}
if strings.Contains(readline, "$yqDocSeparator$") {
if err := ye.PrintDocumentSeparator(writer); err != nil {
return err
// Preserve the original line ending (CRLF or LF)
lineEnding := "\n"
if strings.HasSuffix(readline, "\r\n") {
lineEnding = "\r\n"
}
if ye.prefs.PrintDocSeparators {
if err := writeString(writer, "---"+lineEnding); err != nil {
return err
}
}
} else {
if len(readline) > 0 && readline != "\n" && readline[0] != '%' && !commentLineRegEx.MatchString(readline) {
if len(readline) > 0 && readline != "\n" && readline[0] != '%' && !commentLineRe.MatchString(readline) {
readline = "# " + readline
}
if ye.prefs.ColorsEnabled && strings.TrimSpace(readline) != "" {
@ -79,10 +85,15 @@ func (ye *yamlEncoder) PrintLeadingContent(writer io.Writer, content string) err
func (ye *yamlEncoder) Encode(writer io.Writer, node *CandidateNode) error {
log.Debug("encoderYaml - going to print %v", NodeToString(node))
// Detect line ending style from LeadingContent
lineEnding := "\n"
if strings.Contains(node.LeadingContent, "\r\n") {
lineEnding = "\r\n"
}
if node.Kind == ScalarNode && ye.prefs.UnwrapScalar {
valueToPrint := node.Value
if node.LeadingContent == "" || valueToPrint != "" {
valueToPrint = valueToPrint + "\n"
valueToPrint = valueToPrint + lineEnding
}
return writeString(writer, valueToPrint)
}

View File

@ -13,6 +13,31 @@ var yamlFormatScenarios = []formatScenario{
input: "--- cat",
expected: "---\ncat\n",
},
{
description: "CRLF doc separator",
skipDoc: true,
input: "---\r\ncat\r\n",
expected: "---\r\ncat\r\n",
},
{
description: "yaml directive preserved (LF)",
skipDoc: true,
input: "%YAML 1.1\n---\ncat\n",
expected: "%YAML 1.1\n---\ncat\n",
},
{
description: "yaml directive preserved (CRLF)",
skipDoc: true,
input: "%YAML 1.1\r\n---\r\ncat\r\n",
expected: "%YAML 1.1\r\n---\r\ncat\r\n",
},
{
description: "comment only no trailing newline",
skipDoc: true,
input: "# hello",
expected: "# hello\n",
},
{
description: "scalar with doc separator",
skipDoc: true,