From 1c83c5d9ee464d015299d0f36e250b4b6dccec5a Mon Sep 17 00:00:00 2001 From: matiusz Date: Thu, 1 Feb 2024 16:14:09 +0100 Subject: [PATCH] code: Remove unnecessary files --- .github/workflows/render.yml | 4 +- requirements_deploy.txt | 3 + requirements.txt => requirements_full.txt | 22 +-- src/obj/Songbook.py | 20 --- src/tools/fuzz.py | 162 ---------------------- src/tools/fuzzUtils.py | 21 --- 6 files changed, 16 insertions(+), 216 deletions(-) create mode 100644 requirements_deploy.txt rename requirements.txt => requirements_full.txt (93%) delete mode 100644 src/tools/fuzz.py delete mode 100644 src/tools/fuzzUtils.py diff --git a/.github/workflows/render.yml b/.github/workflows/render.yml index 14ebccc2..a6acd6fe 100644 --- a/.github/workflows/render.yml +++ b/.github/workflows/render.yml @@ -14,7 +14,7 @@ jobs: service-id: ${{ secrets.HK_RENDER_SERVICE }} render-token: ${{ secrets.RENDER_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }} - retries: 6 + retries: 10 wait: 20000 - name: spiewnik111adh uses: bounceapp/render-action@0.6.0 @@ -22,5 +22,5 @@ jobs: service-id: ${{ secrets.ADH_RENDER_SERVICE }} render-token: ${{ secrets.RENDER_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }} - retries: 6 + retries: 10 wait: 20000 \ No newline at end of file diff --git a/requirements_deploy.txt b/requirements_deploy.txt new file mode 100644 index 00000000..3417deee --- /dev/null +++ b/requirements_deploy.txt @@ -0,0 +1,3 @@ +Flask +Frozen-Flask +unidecode \ No newline at end of file diff --git a/requirements.txt b/requirements_full.txt similarity index 93% rename from requirements.txt rename to requirements_full.txt index 8332d684..e8371304 100644 --- a/requirements.txt +++ b/requirements_full.txt @@ -1,12 +1,12 @@ -aiofiles>=0.7.0 -pynput>=1.7.6 -PySide6>=6.2.0 -Flask>=2.0.2 -WTForms>=3.0.0 -Frozen-Flask -Flask-WTF>=1.0.0 -requests>=2.27.1 -pre-commit -markdown2 -rapidfuzz +aiofiles>=0.7.0 +pynput>=1.7.6 +PySide6>=6.2.0 +Flask>=2.0.2 +WTForms>=3.0.0 +Frozen-Flask +Flask-WTF>=1.0.0 +requests>=2.27.1 +pre-commit +markdown2 +rapidfuzz unidecode \ No newline at end of file diff --git a/src/obj/Songbook.py b/src/obj/Songbook.py index ddc120ff..6cc96e62 100644 --- a/src/obj/Songbook.py +++ b/src/obj/Songbook.py @@ -1,9 +1,5 @@ from .Song import Song from src.tools.loggerSetup import logging -from collections import defaultdict -from unidecode import unidecode -from src.tools import fuzz -from string import punctuation from src.tools.dirTools import getCategoriesFromDirs, getSongFilenamesFromCatDir logger = logging.getLogger(__name__) @@ -16,19 +12,3 @@ def __init__(self): for song in self.sb[cat]: if song.category != cat: logger.warning(f"Category mismatch for song {song.title} - category: {song.category}, folder: {cat}") - - def filteredSongs(self, filter): - filtered = defaultdict(list) - for cat, songs in self.sb.items(): - for song in songs: - if not filter or \ - prepareSearchString(filter) in prepareSearchString(song.title) or \ - fuzz.partial_ratio(prepareSearchString(filter), prepareSearchString(song.filterString)) > 95: - filtered[cat].append(song) - return filtered - -def prepareSearchString(string): - string = string.lower() - string = unidecode(string) - string = string.translate(str.maketrans('', '', punctuation)) - return string \ No newline at end of file diff --git a/src/tools/fuzz.py b/src/tools/fuzz.py deleted file mode 100644 index 7287251a..00000000 --- a/src/tools/fuzz.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python -# TODO: Revert to standard thefuzz if https://github.com/seatgeek/thefuzz/pull/10 gets merged - - -from rapidfuzz.fuzz import ( - ratio as _ratio, - partial_ratio as _partial_ratio, - token_set_ratio as _token_set_ratio, - token_sort_ratio as _token_sort_ratio, - partial_token_set_ratio as _partial_token_set_ratio, - partial_token_sort_ratio as _partial_token_sort_ratio, - WRatio as _WRatio, -) - -from . import fuzzUtils as utils - -########################### -# Basic Scoring Functions # -########################### - - -def _rapidfuzz_scorer(scorer, s1, s2, force_ascii, full_process): - """ - wrapper around rapidfuzz function to be compatible with the API of thefuzz - """ - if full_process: - if s1 is None or s2 is None: - return 0 - - s1 = utils.full_process(s1, force_ascii=force_ascii) - s2 = utils.full_process(s2, force_ascii=force_ascii) - - return int(round(scorer(s1, s2, processor=None))) - - -def ratio(s1, s2): - return _rapidfuzz_scorer(_ratio, s1, s2, False, False) - - -def partial_ratio(s1, s2): - """ - Return the ratio of the most similar substring - as a number between 0 and 100. - """ - return _rapidfuzz_scorer(_partial_ratio, s1, s2, False, False) - - -############################## -# Advanced Scoring Functions # -############################## - -def _process_and_sort(s, force_ascii, full_process=True): - """Return a cleaned string with token sorted.""" - # pull tokens - ts = utils.full_process(s, force_ascii=force_ascii) if full_process else s - tokens = ts.split() - - # sort tokens and join - sorted_string = " ".join(sorted(tokens)) - return sorted_string.strip() - - -# Sorted Token -# find all alphanumeric tokens in the string -# sort those tokens and take ratio of resulting joined strings -# controls for unordered string elements -def token_sort_ratio(s1, s2, force_ascii=True, full_process=True): - """ - Return a measure of the sequences' similarity between 0 and 100 - but sorting the token before comparing. - """ - return _rapidfuzz_scorer(_token_sort_ratio, s1, s2, force_ascii, full_process) - - -def partial_token_sort_ratio(s1, s2, force_ascii=True, full_process=True): - """ - Return the ratio of the most similar substring as a number between - 0 and 100 but sorting the token before comparing. - """ - return _rapidfuzz_scorer( - _partial_token_sort_ratio, s1, s2, force_ascii, full_process - ) - - -def token_set_ratio(s1, s2, force_ascii=True, full_process=True): - return _rapidfuzz_scorer(_token_set_ratio, s1, s2, force_ascii, full_process) - - -def partial_token_set_ratio(s1, s2, force_ascii=True, full_process=True): - return _rapidfuzz_scorer( - _partial_token_set_ratio, s1, s2, force_ascii, full_process - ) - - -################### -# Combination API # -################### - -# q is for quick -def QRatio(s1, s2, force_ascii=True, full_process=True): - """ - Quick ratio comparison between two strings. - Runs full_process from utils on both strings - Short circuits if either of the strings is empty after processing. - :param s1: - :param s2: - :param force_ascii: Allow only ASCII characters (Default: True) - :full_process: Process inputs, used here to avoid double processing in extract functions (Default: True) - :return: similarity ratio - """ - return _rapidfuzz_scorer(_ratio, s1, s2, force_ascii, full_process) - - -def UQRatio(s1, s2, full_process=True): - """ - Unicode quick ratio - Calls QRatio with force_ascii set to False - :param s1: - :param s2: - :return: similarity ratio - """ - return QRatio(s1, s2, force_ascii=False, full_process=full_process) - - -# w is for weighted -def WRatio(s1, s2, force_ascii=True, full_process=True): - """ - Return a measure of the sequences' similarity between 0 and 100, using different algorithms. - **Steps in the order they occur** - #. Run full_process from utils on both strings - #. Short circuit if this makes either string empty - #. Take the ratio of the two processed strings (fuzz.ratio) - #. Run checks to compare the length of the strings - * If one of the strings is more than 1.5 times as long as the other - use partial_ratio comparisons - scale partial results by 0.9 - (this makes sure only full results can return 100) - * If one of the strings is over 8 times as long as the other - instead scale by 0.6 - #. Run the other ratio functions - * if using partial ratio functions call partial_ratio, - partial_token_sort_ratio and partial_token_set_ratio - scale all of these by the ratio based on length - * otherwise call token_sort_ratio and token_set_ratio - * all token based comparisons are scaled by 0.95 - (on top of any partial scalars) - #. Take the highest value from these results - round it and return it as an integer. - :param s1: - :param s2: - :param force_ascii: Allow only ascii characters - :type force_ascii: bool - :full_process: Process inputs, used here to avoid double processing in extract functions (Default: True) - :return: - """ - return _rapidfuzz_scorer(_WRatio, s1, s2, force_ascii, full_process) - - -def UWRatio(s1, s2, full_process=True): - """Return a measure of the sequences' similarity between 0 and 100, - using different algorithms. Same as WRatio but preserving unicode. - """ - return WRatio(s1, s2, force_ascii=False, full_process=full_process) \ No newline at end of file diff --git a/src/tools/fuzzUtils.py b/src/tools/fuzzUtils.py deleted file mode 100644 index 499a8690..00000000 --- a/src/tools/fuzzUtils.py +++ /dev/null @@ -1,21 +0,0 @@ -from rapidfuzz.utils import default_process as _default_process - -bad_chars = "".join([chr(i) for i in range(128, 256)]) # ascii dammit! -translation_table = {ord(c): None for c in bad_chars} - - -def ascii_only(s): - return s.translate(translation_table) - - -def full_process(s, force_ascii=False): - """Process string by - -- removing all but letters and numbers - -- trim whitespace - -- force to lower case - if force_ascii == True, force convert to ascii""" - - if force_ascii: - s = ascii_only(str(s)) - - return _default_process(s) \ No newline at end of file