From 94a1c4830bacb672ed105fe35e6b6601cd521af3 Mon Sep 17 00:00:00 2001 From: Alberto Carretero Date: Mon, 16 Dec 2024 12:04:24 +0100 Subject: [PATCH] feat: optimize common case of GlobPath (#180) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Results of the benchmark on my machine (using hyperfine with 10 rounds). Basically, HEAD takes 5.341s on my machine compared to 18.366s for main. $ bash benchmark.sh HEAD main Creating rev: e2ee603c7396b33038e47352c0722b5b1202fbfe (main) Creating rev: 19829794e6454f78334c9a67b74d8abb9bc66b25 (HEAD) Benchmark 1: ./19829794e6454f78334c9a67b74d8abb9bc66b25 info --release ../chisel-releases/ubuntu-24.04 'python3.12_core' Time (mean ± σ): 5.341 s ± 0.115 s [User: 5.528 s, System: 0.028 s] Range (min … max): 5.239 s … 5.635 s 10 runs Benchmark 2: ./e2ee603c7396b33038e47352c0722b5b1202fbfe info --release ../chisel-releases/ubuntu-24.04 'python3.12_core' Time (mean ± σ): 18.366 s ± 0.139 s [User: 19.677 s, System: 0.103 s] Range (min … max): 18.219 s … 18.675 s 10 runs Summary ./19829794e6454f78334c9a67b74d8abb9bc66b25 info --release ../chisel-releases/ubuntu-24.04 'python3.12_core' ran 3.44 ± 0.08 times faster than ./e2ee603c7396b33038e47352c0722b5b1202fbfe info --release ../chisel-releases/ubuntu-24.04 'python3.12_core' --- internal/strdist/strdist.go | 43 +++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/internal/strdist/strdist.go b/internal/strdist/strdist.go index f205bfcf..d5b640ef 100644 --- a/internal/strdist/strdist.go +++ b/internal/strdist/strdist.go @@ -105,6 +105,15 @@ func Distance(a, b string, f CostFunc, cut int64) int64 { // * - Any zero or more characters, except for / // ** - Any zero or more characters, including / func GlobPath(a, b string) bool { + if !wildcardPrefixMatch(a, b) { + // Fast path. + return false + } + if !wildcardSuffixMatch(a, b) { + // Fast path. + return false + } + a = strings.ReplaceAll(a, "**", "⁑") b = strings.ReplaceAll(b, "**", "⁑") return Distance(a, b, globCost, 1) == 0 @@ -125,3 +134,37 @@ func globCost(ar, br rune) Cost { } return Cost{SwapAB: 1, DeleteA: 1, InsertB: 1} } + +// wildcardPrefixMatch compares whether the prefixes of a and b are equal up +// to the shortest one. The prefix is defined as the longest substring that +// starts at index 0 and does not contain a wildcard. +func wildcardPrefixMatch(a, b string) bool { + ai := strings.IndexAny(a, "*?") + bi := strings.IndexAny(b, "*?") + if ai == -1 { + ai = len(a) + } + if bi == -1 { + bi = len(b) + } + mini := min(ai, bi) + return a[:mini] == b[:mini] +} + +// wildcardSuffixMatch compares whether the suffixes of a and b are equal up +// to the shortest one. The suffix is defined as the longest substring that ends +// at the string length and does not contain a wildcard. +func wildcardSuffixMatch(a, b string) bool { + ai := strings.LastIndexAny(a, "*?") + la := 0 + if ai != -1 { + la = len(a) - ai - 1 + } + lb := 0 + bi := strings.LastIndexAny(b, "*?") + if bi != -1 { + lb = len(b) - bi - 1 + } + minl := min(la, lb) + return a[len(a)-minl:] == b[len(b)-minl:] +}