diff --git a/Dockerfile b/Dockerfile index 64786c098..b8ea7cf88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev RUN pip install -U wheel six pytest -RUN pip install colorama==0.4.6 contourpy==1.2.0 cycler==0.12.1 deprecation==2.1.0 fonttools==4.49.0 graphviz==0.20.1 intervaltree==3.1.0 kiwisolver==1.4.5 lxml==5.1.0 matplotlib==3.8.3 networkx==3.2.1 numpy==1.26.4 packaging==23.2 pandas==2.2.1 pillow==10.2.0 pydotplus==2.0.2 pyparsing==3.1.1 python-dateutil==2.9.0.post0 pytz==2024.1 scipy==1.12.0 six==1.16.0 sortedcontainers==2.4.0 StringDist==1.0.9 tqdm==4.66.2 tzdata==2024.1 +RUN pip install colorama==0.4.6 contourpy==1.2.0 cycler==0.12.1 deprecation==2.1.0 fonttools==4.49.0 graphviz==0.20.1 intervaltree==3.1.0 kiwisolver==1.4.5 lxml==5.1.0 matplotlib==3.8.3 networkx==3.2.1 numpy==1.26.4 packaging==23.2 pandas==2.2.1 pillow==10.2.0 pydotplus==2.0.2 pyparsing==3.1.1 python-dateutil==2.9.0.post0 pytz==2024.1 scipy==1.12.0 six==1.16.0 sortedcontainers==2.4.0 tqdm==4.66.2 tzdata==2024.1 COPY . /app RUN cd /app && python setup.py install diff --git a/pm4py/algo/label_splitting/variants/contextual.py b/pm4py/algo/label_splitting/variants/contextual.py index 52b2733fa..9db5f8bb5 100644 --- a/pm4py/algo/label_splitting/variants/contextual.py +++ b/pm4py/algo/label_splitting/variants/contextual.py @@ -4,8 +4,7 @@ import pandas as pd from enum import Enum from pm4py.util import constants, xes_constants, exec_utils, pandas_utils, nx_utils -from pm4py.util import regex -import stringdist +from pm4py.util import regex, string_distance class Parameters(Enum): @@ -40,7 +39,7 @@ def __normalized_edit_distance(s1: str, s2: str) -> float: """ ned = 0 if len(s1) > 0 or len(s2) > 0: - ed = stringdist.levenshtein(s1, s2) + ed = string_distance.levenshtein(s1, s2) ned = ed / max(len(s1), len(s2)) return ned diff --git a/pm4py/util/string_distance.py b/pm4py/util/string_distance.py index cfe19b1ca..712786a51 100644 --- a/pm4py/util/string_distance.py +++ b/pm4py/util/string_distance.py @@ -1,9 +1,34 @@ import sys +import importlib.util from typing import List, Union -import stringdist -levenshtein = lambda stru1, stru2: stringdist.levenshtein(stru1, stru2) +def levenshtein_distance(s1, s2): + if len(s1) < len(s2): + return levenshtein_distance(s2, s1) + + if len(s2) == 0: + return len(s1) + + previous_row = range(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + + return previous_row[-1] + + +def levenshtein(stru1, stru2): + if importlib.util.find_spec("stringdist"): + import stringdist + return stringdist.levenshtein(stru1, stru2) + + return levenshtein_distance(stru1, stru2) def argmin_levenshtein(stru: str, list_stri: List[str]) -> Union[str, None]: diff --git a/requirements.txt b/requirements.txt index 2feb81698..bdb49158d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,4 @@ pandas pydotplus pytz scipy -stringdist tqdm diff --git a/requirements_complete.txt b/requirements_complete.txt index 8ca0ecf39..d8ae6e7e1 100644 --- a/requirements_complete.txt +++ b/requirements_complete.txt @@ -20,6 +20,5 @@ pytz scipy six sortedcontainers -StringDist tqdm tzdata diff --git a/requirements_stable.txt b/requirements_stable.txt index 4f673d362..e9bce91f8 100644 --- a/requirements_stable.txt +++ b/requirements_stable.txt @@ -20,6 +20,5 @@ pytz==2024.1 scipy==1.12.0 six==1.16.0 sortedcontainers==2.4.0 -StringDist==1.0.9 tqdm==4.66.2 tzdata==2024.1 diff --git a/third_party/LICENSES_TRANSITIVE.md b/third_party/LICENSES_TRANSITIVE.md index 9d9ff5e07..ba54ee239 100644 --- a/third_party/LICENSES_TRANSITIVE.md +++ b/third_party/LICENSES_TRANSITIVE.md @@ -28,6 +28,5 @@ libraries are added/removed. | scipy | https://pypi.org/project/scipy | BSD License | 1.12.0 | | six | https://pypi.org/project/six | MIT License (MIT) | 1.16.0 | | sortedcontainers | https://pypi.org/project/sortedcontainers | Apache Software License (Apache 2.0) | 2.4.0 | -| StringDist | https://pypi.org/project/StringDist | MIT License (MIT) | 1.0.9 | | tqdm | https://pypi.org/project/tqdm | MIT License, Mozilla Public License 2.0 (MPL 2.0) (MPL-2.0 AND MIT) | 4.66.2 | | tzdata | https://pypi.org/project/tzdata | Apache Software License (Apache-2.0) | 2024.1 | diff --git a/third_party/stringdist.LICENSE b/third_party/stringdist.LICENSE deleted file mode 100644 index 79867078c..000000000 --- a/third_party/stringdist.LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 Oleg Bulkin - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file