From a0f1c24b3f054880a028cbecaa6969b52d649234 Mon Sep 17 00:00:00 2001 From: Dave Bunten Date: Wed, 30 Aug 2023 08:36:36 -0600 Subject: [PATCH] Reuse existing Parsl configurations (#93) * detect parsl load and reuse existing config seeking to address #90 * attempt gc * release unused for arrow test * modify comment about warning * add note to docstring * isort linting * raise other runtimeerrors * Apply suggestions from code review Co-authored-by: Gregory Way --------- Co-authored-by: Gregory Way --- cytotable/convert.py | 28 ++++++++-------------------- cytotable/utils.py | 24 +++++++++++++++++++++++- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/cytotable/convert.py b/cytotable/convert.py index 1716ea2d..1826253c 100644 --- a/cytotable/convert.py +++ b/cytotable/convert.py @@ -13,7 +13,7 @@ from parsl.app.app import join_app, python_app from cytotable.presets import config -from cytotable.utils import _column_sort, _default_parsl_config +from cytotable.utils import _column_sort, _default_parsl_config, _parsl_loaded logger = logging.getLogger(__name__) @@ -1314,6 +1314,8 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals an optional group of presets to use based on common configurations parsl_config: Optional[parsl.Config] (Default value = None) Optional Parsl configuration to use for running CytoTable operations. + Note: when using CytoTable multiple times in the same process, + CytoTable will use the first provided configuration for all runs. Returns: Union[Dict[str, List[Dict[str, Any]]], str] @@ -1356,31 +1358,17 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals ) """ - # attempt to load parsl configuration - try: + # attempt to load parsl configuration if we didn't already load one + if not _parsl_loaded(): # if we don't have a parsl configuration provided, load the default if parsl_config is None: parsl.load(_default_parsl_config()) else: # else we attempt to load the given parsl configuration parsl.load(parsl_config) - except RuntimeError as runtime_exc: - # catch cases where parsl has already been loaded and defer to - # previously loaded configuration with a warning - if str(runtime_exc) == "Config has already been loaded": - logger.warning(str(runtime_exc)) - - # if we're supplying a new config, attempt to clear current config - # and use the new configuration instead. Otherwise, use existing. - if parsl_config is not None: - # clears the existing parsl configuration - parsl.clear() - # then load the supplied configuration - parsl.load(parsl_config) - - # for other potential runtime errors besides config already being loaded - else: - raise + else: + # otherwise warn the user about previous config. + logger.warning("Reusing previously loaded Parsl configuration.") # optionally load preset configuration for arguments # note: defer to overrides from parameters whose values diff --git a/cytotable/utils.py b/cytotable/utils.py index 35a62f56..ff898980 100644 --- a/cytotable/utils.py +++ b/cytotable/utils.py @@ -9,7 +9,7 @@ from typing import Any, Dict, Union, cast import duckdb -import pyarrow as pa +import parsl from cloudpathlib import AnyPath, CloudPath from cloudpathlib.exceptions import InvalidPrefixError from parsl.app.app import AppBase @@ -99,6 +99,28 @@ def Parsl_AppBase_init_for_docs(self, func, *args, **kwargs): AppBase.__init__ = Parsl_AppBase_init_for_docs +def _parsl_loaded() -> bool: + """ + Checks whether Parsl configuration has already been loaded. + """ + + try: + # try to reference Parsl dataflowkernel + parsl.dfk() + except RuntimeError as rte: + # if we detect a runtime error that states we need to load config + # return false to indicate parsl config has not yet been loaded. + if str(rte) == "Must first load config": + return False + + # otherwise we raise other RuntimeError's + else: + raise + + # otherwise we indicate parsl config has already been loaded + return True + + def _default_parsl_config(): """ Return a default Parsl configuration for use with CytoTable.