From dd259b495721c8be4d48ac487fa2eae047bc193b Mon Sep 17 00:00:00 2001 From: Peter Matseykanets Date: Thu, 14 Oct 2021 01:13:19 -0400 Subject: [PATCH] Make deepMatch report in linear time The current implementation of the deepMatch() has the exponential runtime. Given the long enough input and the pattern with multiple wildcards it takes a while if ever to complete which can potentially be used maliciously to cause a denial of service (cpu and memory consumption). E.g. running this in the root of this repository time yq eval '.jobs.publishDocker.steps.[] | select (.run == "****outputs")' .github/workflows/release.yml gives on my laptop 25.11s user 0.06s system 99% cpu 25.182 total Whereas the updated implementation gives 0.01s user 0.01s system 36% cpu 0.049 total There are numerous similar CVEs reported for glob evaluation in different shells/ftp-servers/libraries. The replacement implementation with the linear runtime is shamelessly taken verbatim from the briliant article by Russ Cox https://research.swtch.com/glob --- pkg/yqlib/matchKeyString.go | 58 ++++++++++++++++++++++---------- pkg/yqlib/matchKeyString_test.go | 40 ++++++++++++++++++++++ 2 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 pkg/yqlib/matchKeyString_test.go diff --git a/pkg/yqlib/matchKeyString.go b/pkg/yqlib/matchKeyString.go index d89a90f9..bfea235e 100644 --- a/pkg/yqlib/matchKeyString.go +++ b/pkg/yqlib/matchKeyString.go @@ -9,26 +9,50 @@ func matchKey(name string, pattern string) (matched bool) { log.Debug("wild!") return true } - return deepMatch([]rune(name), []rune(pattern)) + return deepMatch(name, pattern) } -func deepMatch(str, pattern []rune) bool { - for len(pattern) > 0 { - switch pattern[0] { - default: - if len(str) == 0 || str[0] != pattern[0] { - return false +// deepMatch reports whether the name matches the pattern in linear time. +// Source https://research.swtch.com/glob +func deepMatch(name, pattern string) bool { + px := 0 + nx := 0 + nextPx := 0 + nextNx := 0 + for px < len(pattern) || nx < len(name) { + if px < len(pattern) { + c := pattern[px] + switch c { + default: // ordinary character + if nx < len(name) && name[nx] == c { + px++ + nx++ + continue + } + case '?': // single-character wildcard + if nx < len(name) { + px++ + nx++ + continue + } + case '*': // zero-or-more-character wildcard + // Try to match at nx. + // If that doesn't work out, + // restart at nx+1 next. + nextPx = px + nextNx = nx + 1 + px++ + continue } - case '?': - if len(str) == 0 { - return false - } - case '*': - return deepMatch(str, pattern[1:]) || - (len(str) > 0 && deepMatch(str[1:], pattern)) } - str = str[1:] - pattern = pattern[1:] + // Mismatch. Maybe restart. + if 0 < nextNx && nextNx <= len(name) { + px = nextPx + nx = nextNx + continue + } + return false } - return len(str) == 0 && len(pattern) == 0 + // Matched all of pattern to all of name. Success. + return true } diff --git a/pkg/yqlib/matchKeyString_test.go b/pkg/yqlib/matchKeyString_test.go new file mode 100644 index 00000000..3cacb4a5 --- /dev/null +++ b/pkg/yqlib/matchKeyString_test.go @@ -0,0 +1,40 @@ +package yqlib + +import ( + "strings" + "testing" +) + +func TestDeepMatch(t *testing.T) { + var tests = []struct { + name string + pattern string + ok bool + }{ + {"", "", true}, + {"", "x", false}, + {"x", "", false}, + {"abc", "abc", true}, + {"abc", "*", true}, + {"abc", "*c", true}, + {"abc", "*b", false}, + {"abc", "a*", true}, + {"abc", "b*", false}, + {"a", "a*", true}, + {"a", "*a", true}, + {"axbxcxdxe", "a*b*c*d*e*", true}, + {"axbxcxdxexxx", "a*b*c*d*e*", true}, + {"abxbbxdbxebxczzx", "a*b?c*x", true}, + {"abxbbxdbxebxczzy", "a*b?c*x", false}, + {strings.Repeat("a", 100), "a*a*a*a*b", false}, + {"xxx", "*x", true}, + } + + for _, tt := range tests { + t.Run(tt.name+" "+tt.pattern, func(t *testing.T) { + if want, got := tt.ok, deepMatch(tt.name, tt.pattern); want != got { + t.Errorf("Expected %v got %v", want, got) + } + }) + } +}