From 928a95aa56f60da33a4e724ea2ca97797c612968 Mon Sep 17 00:00:00 2001 From: Florian Jetter Date: Fri, 6 Oct 2023 12:48:10 +0200 Subject: [PATCH] Improve cache hits for tuple keys in `key_split` and intern results (#10547) --- dask/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dask/utils.py b/dask/utils.py index 6713fb3b86f..e2ebb6a588e 100644 --- a/dask/utils.py +++ b/dask/utils.py @@ -1851,10 +1851,11 @@ def key_split(s): >>> key_split('_(x)') # strips unpleasant characters 'x' """ + # If we convert the key, recurse to utilize LRU cache better if type(s) is bytes: - s = s.decode() + return key_split(s.decode()) if type(s) is tuple: - s = s[0] + return key_split(s[0]) try: words = s.split("-") if not words[0][0].isalpha(): @@ -1873,7 +1874,7 @@ def key_split(s): else: if result[0] == "<": result = result.strip("<>").split()[0].split(".")[-1] - return result + return sys.intern(result) except Exception: return "Other"