Skip to content

Commit

Permalink
feat: optimize common case of GlobPath (canonical#180)
Browse files Browse the repository at this point in the history
Results of the benchmark on my machine (using hyperfine with 10 rounds).

Basically, HEAD takes 5.341s on my machine compared to 18.366s for main.

$ bash benchmark.sh HEAD main 
Creating rev: e2ee603 (main)
Creating rev: 1982979 (HEAD)
Benchmark 1: ./19829794e6454f78334c9a67b74d8abb9bc66b25 info --release ../chisel-releases/ubuntu-24.04 'python3.12_core'
  Time (mean ± σ):      5.341 s ±  0.115 s    [User: 5.528 s, System: 0.028 s]
  Range (min … max):    5.239 s …  5.635 s    10 runs
 
Benchmark 2: ./e2ee603c7396b33038e47352c0722b5b1202fbfe info --release ../chisel-releases/ubuntu-24.04 'python3.12_core'
  Time (mean ± σ):     18.366 s ±  0.139 s    [User: 19.677 s, System: 0.103 s]
  Range (min … max):   18.219 s … 18.675 s    10 runs
 
Summary
  ./19829794e6454f78334c9a67b74d8abb9bc66b25 info --release ../chisel-releases/ubuntu-24.04 'python3.12_core' ran
    3.44 ± 0.08 times faster than ./e2ee603c7396b33038e47352c0722b5b1202fbfe info --release ../chisel-releases/ubuntu-24.04 'python3.12_core'
  • Loading branch information
letFunny authored and cjdcordeiro committed Dec 16, 2024
1 parent 82bb84f commit 3ab49af
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions internal/strdist/strdist.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ func Distance(a, b string, f CostFunc, cut int64) int64 {
// * - Any zero or more characters, except for /
// ** - Any zero or more characters, including /
func GlobPath(a, b string) bool {
if !wildcardPrefixMatch(a, b) {
// Fast path.
return false
}
if !wildcardSuffixMatch(a, b) {
// Fast path.
return false
}

a = strings.ReplaceAll(a, "**", "⁑")
b = strings.ReplaceAll(b, "**", "⁑")
return Distance(a, b, globCost, 1) == 0
Expand All @@ -125,3 +134,37 @@ func globCost(ar, br rune) Cost {
}
return Cost{SwapAB: 1, DeleteA: 1, InsertB: 1}
}

// wildcardPrefixMatch compares whether the prefixes of a and b are equal up
// to the shortest one. The prefix is defined as the longest substring that
// starts at index 0 and does not contain a wildcard.
func wildcardPrefixMatch(a, b string) bool {
ai := strings.IndexAny(a, "*?")
bi := strings.IndexAny(b, "*?")
if ai == -1 {
ai = len(a)
}
if bi == -1 {
bi = len(b)
}
mini := min(ai, bi)
return a[:mini] == b[:mini]
}

// wildcardSuffixMatch compares whether the suffixes of a and b are equal up
// to the shortest one. The suffix is defined as the longest substring that ends
// at the string length and does not contain a wildcard.
func wildcardSuffixMatch(a, b string) bool {
ai := strings.LastIndexAny(a, "*?")
la := 0
if ai != -1 {
la = len(a) - ai - 1
}
lb := 0
bi := strings.LastIndexAny(b, "*?")
if bi != -1 {
lb = len(b) - bi - 1
}
minl := min(la, lb)
return a[len(a)-minl:] == b[len(b)-minl:]
}

0 comments on commit 3ab49af

Please sign in to comment.