Skip to content

Commit

Permalink
convert remaining direct wrappers
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Nov 1, 2023
1 parent a95daa7 commit 285da68
Showing 1 changed file with 61 additions and 66 deletions.
127 changes: 61 additions & 66 deletions src/Levenshtein/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

__author__: str = "Max Bachmann"
__license__: str = "GPL"
__version__: str = "0.23.0"

import rapidfuzz.distance.Levenshtein as _Levenshtein
import rapidfuzz.distance.Indel as _Indel
Expand All @@ -37,6 +36,12 @@
seqratio,
)

import importlib.metadata

try:
__version__: str = importlib.metadata.version(__package__ or __name__)
except importlib.metadata.PackageNotFoundError:
__version__: str = "0.0.0"

def _copy_func(f, name, doc):
"""Based on https://stackoverflow.com/a/13503277/11335032"""
Expand Down Expand Up @@ -139,7 +144,7 @@ def _copy_func(f, name, doc):
distance._RF_OriginalScorer = distance


ratio_doc = """
_ratio_doc = """
Calculates a normalized indel similarity in the range [0, 1].
This is calculated as ``1 - normalized_distance``
Expand Down Expand Up @@ -181,10 +186,10 @@ def _copy_func(f, name, doc):
>>> ratio(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
0.8571428571428572
"""
ratio = _copy_func(_Indel.normalized_similarity, "ratio", ratio_doc)
ratio = _copy_func(_Indel.normalized_similarity, "ratio", _ratio_doc)
ratio._RF_OriginalScorer = ratio

hamming_doc = """
_hamming_doc = """
Calculates the Hamming distance between two strings.
The hamming distance is defined as the number of positions
where the two strings differ. It describes the minimum
Expand Down Expand Up @@ -219,76 +224,66 @@ def _copy_func(f, name, doc):
ValueError
If s1 and s2 have a different length
"""
hamming = _copy_func(_Hamming.distance, "hamming", hamming_doc)
hamming = _copy_func(_Hamming.distance, "hamming", _hamming_doc)
hamming._RF_OriginalScorer = hamming

_jaro_doc = """
Calculates the jaro similarity
def jaro(s1, s2, *, processor=None, score_cutoff=None) -> float:
"""
Calculates the jaro similarity
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
Optional argument for a score threshold as a float between 0 and 1.0.
For ratio < score_cutoff 0 is returned instead. Default is None,
which deactivates this behaviour.
Returns
-------
similarity : float
similarity between s1 and s2 as a float between 0 and 1.0
"""
return _Jaro.similarity(s1, s2, processor=processor, score_cutoff=score_cutoff)
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
Optional argument for a score threshold as a float between 0 and 1.0.
For ratio < score_cutoff 0 is returned instead. Default is None,
which deactivates this behaviour.
Returns
-------
similarity : float
similarity between s1 and s2 as a float between 0 and 1.0
"""
jaro = _copy_func(_Jaro.similarity, "jaro", _jaro_doc)
jaro._RF_OriginalScorer = jaro

def jaro_winkler(
s1, s2, *, prefix_weight=0.1, processor=None, score_cutoff=None
) -> float:
"""
Calculates the jaro winkler similarity
_jaro_winkler_doc = """
Calculates the jaro winkler similarity
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
prefix_weight : float, optional
Weight used for the common prefix of the two strings.
Has to be between 0 and 0.25. Default is 0.1.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
Optional argument for a score threshold as a float between 0 and 1.0.
For ratio < score_cutoff 0 is returned instead. Default is None,
which deactivates this behaviour.
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
prefix_weight : float, optional
Weight used for the common prefix of the two strings.
Has to be between 0 and 0.25. Default is 0.1.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
Optional argument for a score threshold as a float between 0 and 1.0.
For ratio < score_cutoff 0 is returned instead. Default is None,
which deactivates this behaviour.
Returns
-------
similarity : float
similarity between s1 and s2 as a float between 0 and 1.0
Returns
-------
similarity : float
similarity between s1 and s2 as a float between 0 and 1.0
Raises
------
ValueError
If prefix_weight is invalid
"""
return _JaroWinkler.similarity(
s1,
s2,
prefix_weight=prefix_weight,
processor=processor,
score_cutoff=score_cutoff,
)
Raises
------
ValueError
If prefix_weight is invalid
"""
jaro_winkler = _copy_func(_JaroWinkler.similarity, "jaro_winkler", _jaro_winkler_doc)
jaro_winkler._RF_OriginalScorer = jaro_winkler


def editops(*args):
Expand Down

0 comments on commit 285da68

Please sign in to comment.