-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 469fe71
Showing
9 changed files
with
857 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
*.so | ||
*.pyd | ||
__pycache__/ | ||
nimcache/ | ||
.cache/ | ||
.benchmarks/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
|
||
if exists("g:loaded_fruzzy") | ||
finish | ||
endif | ||
|
||
let g:loaded_fruzzy = 1 | ||
if !exists("g:fruzzy#usenative") | ||
let g:fruzzy#usenative = 0 | ||
endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from ..base import Base | ||
from denite.util import convert2fuzzy_pattern | ||
import os | ||
import sys | ||
import logging | ||
|
||
logger = logging.getLogger() | ||
pkgPath = os.path.dirname(__file__).split(os.path.sep)[:-3] | ||
pkgPath = os.path.sep.join(pkgPath) | ||
if pkgPath not in sys.path: | ||
logger.debug("added %s to sys.path" % pkgPath) | ||
sys.path.insert(0, pkgPath) | ||
|
||
import fruzzy | ||
|
||
|
||
class Filter(Base): | ||
|
||
def __init__(self, vim): | ||
super().__init__(vim) | ||
|
||
self.name = 'matcher/fruzzy' | ||
self.description = 'fruzzy - freakishly fast fuzzy matcher' | ||
self.useNative = False | ||
un = self.vim.api.get_var("fruzzy#usenative") | ||
if un > 0: | ||
try: | ||
import fruzzy_mod | ||
self.nativeMethod = fruzzy_mod.scoreMatchesStr | ||
self.useNative = True | ||
except ModuleNotFoundError: | ||
self.debug("Native module requested but unable to load native module") | ||
self.debug("falling back to python implementation") | ||
self.debug("Check if you have nim_fuzzy.so or nim_fuzzy.pyd at %s" % | ||
pkgPath) | ||
self.useNative = False | ||
self.debug("usenative: %s" % self.useNative) | ||
|
||
def filter(self, context): | ||
if not context['candidates'] or not context['input']: | ||
return context['candidates'] | ||
candidates = context['candidates'] | ||
qry = context['input'] | ||
# self.debug("source: %s" % candidates[0]['source_name']) | ||
# self.debug("source: %s" % context['source_name']) | ||
ispath = candidates[0]['source_name'] in ["file", "file_rec", | ||
"file_mru", "directory", | ||
"directory_mru", "file_old", | ||
"directory_rec", "buffer"] | ||
# self.debug("candidates %s %s" % (qry, len(candidates))) | ||
results = self.scoreMatchesProxy(qry, candidates, 10, | ||
key=lambda x: x['word'], | ||
ispath=ispath) | ||
# self.debug("results %s" % results) | ||
rset = [w[0] for w in results] | ||
# self.debug("rset %s" % rset) | ||
return rset | ||
|
||
def scoreMatchesProxy(self, q, c, limit, key=None, ispath=True): | ||
if self.useNative: | ||
idxArr = self.nativeMethod(q, [key(d) for d in c], limit, ispath) | ||
results = [] | ||
for i in idxArr: | ||
results.append((c[i[0]], i[1])) | ||
return results | ||
else: | ||
return fruzzy.scoreMatches(q, c, limit, key, ispath) | ||
|
||
def convert_pattern(self, input_str): | ||
# return convert2fuzzy_pattern(input_str) | ||
p = convert2fuzzy_pattern(input_str) | ||
# self.debug("pattern: %s : %s" % (input_str, p)) | ||
return p |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import sys | ||
import heapq | ||
import itertools | ||
|
||
sep = '-/\_. ' | ||
|
||
|
||
def idfn(x): | ||
return x | ||
|
||
|
||
def scorer(x, key, ispath=True): | ||
""" | ||
:x: - tuple of (item, positions, clusterScore, endScore, sepScore) | ||
- item - the item itself | ||
- positions - indices where each char matched | ||
- clusterScore - How closely are matched chars clustered - 0 if | ||
consecutive | ||
- sepScore - how many matches were after separators (count) | ||
- camelCaseScore - how many matched chars were camelcase | ||
:key: - key func that when applied to x[0] returns the search string | ||
""" | ||
candidate = key(x[0]) | ||
lqry = len(x[1]) | ||
lcan = len(candidate) | ||
|
||
position_boost, end_boost, filematchBoost = 0, 0, 0 | ||
if ispath: | ||
# print("item is", candidate) | ||
# how close to the end of string as pct | ||
position_boost = 100 * (x[1][0]//lcan) | ||
# absolute value of how close it is to end | ||
end_boost = (100 - (lcan - x[1][0])) * 2 | ||
|
||
lastPathSep = candidate.rfind("\\") | ||
if lastPathSep == -1: | ||
lastPathSep = candidate.rfind("/") | ||
fileMatchCount = sum(1 for i in itertools.filterfalse( | ||
lambda p: p < lastPathSep, x[1])) | ||
# print(candidate, lastPathSep, x[1], fileMatchCount) | ||
filematchBoost = 100 * fileMatchCount // lqry | ||
|
||
|
||
# how closely are matches clustered | ||
cluster_boost = 100 * (1 - x[2]//lcan) * 4 | ||
|
||
# boost for matches after separators | ||
# weighted by length of query | ||
sep_boost = 100 * x[3]//lqry * 75//100 | ||
|
||
# boost for camelCase matches | ||
# weighted by lenght of query | ||
camel_boost = 100 * x[4]//lqry | ||
|
||
return position_boost + end_boost + filematchBoost + \ | ||
cluster_boost + sep_boost + camel_boost | ||
# return position_boost + cluster_boost + sep_boost + camel_boost | ||
|
||
|
||
def scoreMatches(query, candidates, limit, key=None, ispath=True): | ||
key = idfn if not key else key | ||
matches = fuzzyMatches(query, candidates, limit * 5, key, ispath) | ||
return heapq.nlargest(limit, matches, key=lambda x: x[5]) | ||
|
||
|
||
def isMatch(query, candidate): | ||
def walkString(query, candidate, left, right): | ||
# print("Call ", query, left, right) | ||
orig = candidate | ||
candidate = candidate.lower() | ||
query = query.lower() | ||
matchPos = [] | ||
first = True | ||
sepScore = 0 | ||
clusterScore = 0 | ||
camelCaseScore = 0 | ||
for i, c in enumerate(query): | ||
# print ("Looking", i, c, left, right) | ||
if first: | ||
pos = candidate.rfind(c, left, right) | ||
else: | ||
pos = candidate.find(c, left) | ||
# print("Result", i, pos, c) | ||
if pos == -1: | ||
if first: | ||
# if the first char was not found anywhere we're done | ||
return (False, []) | ||
else: | ||
# otherwise, find the non matching char to the left of the | ||
# first char pos. Next search on has to be the left of this | ||
# position | ||
posLeft = candidate.rfind(c, 0, matchPos[0]) | ||
if posLeft == -1: | ||
return (False, []) | ||
else: | ||
return (False, [posLeft]) | ||
else: | ||
if pos < len(orig) - 1: | ||
nextChar = orig[pos + 1] | ||
sepScore = sepScore + 1 if nextChar in sep else sepScore | ||
if pos > 0: | ||
prevChar = orig[pos -1] | ||
sepScore = sepScore + 1 if prevChar in sep else sepScore | ||
camelCaseScore = camelCaseScore + 1 if ord(orig[pos]) < 97 \ | ||
and ord(prevChar) >= 97 else camelCaseScore | ||
if pos == 0: | ||
sepScore = sepScore + 1 | ||
camelCaseScore = camelCaseScore + 1 | ||
matchPos.append(pos) | ||
if len(matchPos) > 1: | ||
clusterScore = clusterScore + matchPos[-1] - matchPos[-2] - 1 | ||
left = pos + 1 | ||
first = False | ||
return (True, matchPos, clusterScore, sepScore, camelCaseScore) | ||
|
||
didMatch = False | ||
l, r = 0, len(candidate) | ||
while not didMatch: | ||
didMatch, positions, *rest = walkString(query, candidate, l, r) | ||
if didMatch: | ||
break # all done | ||
if not positions: | ||
break # all done too - first char didn't match | ||
|
||
# resume search - start looking left from this position onwards | ||
r = positions[0] | ||
return (didMatch, positions, *rest) | ||
|
||
|
||
def fuzzyMatches(query, candidates, limit, key=None, ispath=True): | ||
"""Find fuzzy matches among given candidates | ||
:query: TODO | ||
:candidates: TODO | ||
:limit: TODO | ||
:returns: TODO | ||
""" | ||
key = idfn if not key else key | ||
findFirstN = True | ||
count = 0 | ||
for x in candidates: | ||
s = key(x) | ||
didMatch, positions, *rest = isMatch(query, s) | ||
if didMatch: | ||
count = count + 1 | ||
yield (x, positions, *rest, scorer((x, positions, *rest), key, | ||
ispath )) | ||
if findFirstN and count == limit: | ||
return | ||
|
||
|
||
def usage(): | ||
"""TODO: Docstring for usage. | ||
:returns: TODO | ||
""" | ||
print("usage") | ||
|
||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) == 1: | ||
usage() | ||
exit(0) | ||
|
||
file = "neomru_file" | ||
query = sys.argv[1] | ||
if len(sys.argv) == 3: | ||
file = sys.argv[1] | ||
query = sys.argv[2] | ||
|
||
with open(file) as fh: | ||
lines = (line.strip() for line in fh.readlines()) | ||
for x in scoreMatches(query, lines, 10): | ||
print(x) | ||
|
Oops, something went wrong.