Make deepMatch report in linear time

The current implementation of the deepMatch() has the exponential runtime.
Given the long enough input and the pattern with multiple wildcards
it takes a while if ever to complete which can potentially be used
maliciously to cause a denial of service (cpu and memory consumption).

E.g. running this in the root of this repository
time yq eval '.jobs.publishDocker.steps.[] | select (.run == "****outputs")' .github/workflows/release.yml
gives on my laptop
25.11s user 0.06s system 99% cpu 25.182 total

Whereas the updated implementation gives
0.01s user 0.01s system 36% cpu 0.049 total

There are numerous similar CVEs reported for glob evaluation in
different shells/ftp-servers/libraries.

The replacement implementation with the linear runtime is shamelessly taken
verbatim from the briliant article by Russ Cox https://research.swtch.com/glob
This commit is contained in:
Peter Matseykanets 2021-10-14 01:13:19 -04:00 committed by Mike Farah
parent 2da2001651
commit dd259b4957
2 changed files with 81 additions and 17 deletions

View File

@ -9,26 +9,50 @@ func matchKey(name string, pattern string) (matched bool) {
log.Debug("wild!")
return true
}
return deepMatch([]rune(name), []rune(pattern))
return deepMatch(name, pattern)
}
func deepMatch(str, pattern []rune) bool {
for len(pattern) > 0 {
switch pattern[0] {
default:
if len(str) == 0 || str[0] != pattern[0] {
// deepMatch reports whether the name matches the pattern in linear time.
// Source https://research.swtch.com/glob
func deepMatch(name, pattern string) bool {
px := 0
nx := 0
nextPx := 0
nextNx := 0
for px < len(pattern) || nx < len(name) {
if px < len(pattern) {
c := pattern[px]
switch c {
default: // ordinary character
if nx < len(name) && name[nx] == c {
px++
nx++
continue
}
case '?': // single-character wildcard
if nx < len(name) {
px++
nx++
continue
}
case '*': // zero-or-more-character wildcard
// Try to match at nx.
// If that doesn't work out,
// restart at nx+1 next.
nextPx = px
nextNx = nx + 1
px++
continue
}
}
// Mismatch. Maybe restart.
if 0 < nextNx && nextNx <= len(name) {
px = nextPx
nx = nextNx
continue
}
return false
}
case '?':
if len(str) == 0 {
return false
}
case '*':
return deepMatch(str, pattern[1:]) ||
(len(str) > 0 && deepMatch(str[1:], pattern))
}
str = str[1:]
pattern = pattern[1:]
}
return len(str) == 0 && len(pattern) == 0
// Matched all of pattern to all of name. Success.
return true
}

View File

@ -0,0 +1,40 @@
package yqlib
import (
"strings"
"testing"
)
func TestDeepMatch(t *testing.T) {
var tests = []struct {
name string
pattern string
ok bool
}{
{"", "", true},
{"", "x", false},
{"x", "", false},
{"abc", "abc", true},
{"abc", "*", true},
{"abc", "*c", true},
{"abc", "*b", false},
{"abc", "a*", true},
{"abc", "b*", false},
{"a", "a*", true},
{"a", "*a", true},
{"axbxcxdxe", "a*b*c*d*e*", true},
{"axbxcxdxexxx", "a*b*c*d*e*", true},
{"abxbbxdbxebxczzx", "a*b?c*x", true},
{"abxbbxdbxebxczzy", "a*b?c*x", false},
{strings.Repeat("a", 100), "a*a*a*a*b", false},
{"xxx", "*x", true},
}
for _, tt := range tests {
t.Run(tt.name+" "+tt.pattern, func(t *testing.T) {
if want, got := tt.ok, deepMatch(tt.name, tt.pattern); want != got {
t.Errorf("Expected %v got %v", want, got)
}
})
}
}