Skip to content

Commit

Permalink
FEAT-#6296: Add additional pyhdk launch parameters (#6303)
Browse files Browse the repository at this point in the history
* Add log_dir parameter for setting specific log directory for all output log files
* Add enable_lazy_dict_materialization parameter for lazily materializing string dictionary columns only when they are referenced in a query.
* Add support for parsing parameters based on pyhdk version

Signed-off-by: Alex Baden <[email protected]>
  • Loading branch information
alexbaden authored Jul 5, 2023
1 parent 27e2e41 commit 441c0be
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 11 deletions.
4 changes: 4 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import ray


# stub ray.remote to be a no-op so it doesn't shadow docstrings
def noop_decorator(*args, **kwargs):
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
Expand Down Expand Up @@ -43,6 +44,9 @@ def noop_decorator(*args, **kwargs):
sys.modules["pyhdk.hdk"].ExecutionResult = type("ExecutionResult", (object,), {})
if not hasattr(sys.modules["pyhdk.hdk"], "RelAlgExecutor"):
sys.modules["pyhdk.hdk"].RelAlgExecutor = type("RelAlgExecutor", (object,), {})
if not hasattr(sys.modules["pyhdk"], "__version__"):
# Show all known pyhdk config options in documentation
sys.modules["pyhdk"].__version__ = "999"

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import modin
Expand Down
43 changes: 33 additions & 10 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,15 +517,6 @@ class HdkLaunchParameters(EnvironmentVariable, type=dict):
"""

varname = "MODIN_HDK_LAUNCH_PARAMETERS"
default = {
"enable_union": 1,
"enable_columnar_output": 1,
"enable_lazy_fetch": 0,
"null_div_by_zero": 1,
"enable_watchdog": 0,
"enable_thrift_logs": 0,
"cpu_only": 1,
}

@classmethod
def get(cls) -> dict:
Expand Down Expand Up @@ -558,12 +549,44 @@ def _get(cls) -> dict:
Decoded and verified config value.
"""
custom_parameters = super().get()
result = cls.default.copy()
result = cls._get_default().copy()
result.update(
{key.replace("-", "_"): value for key, value in custom_parameters.items()}
)
return result

@classmethod
def _get_default(cls) -> Any:
"""
Get default value of the config. Checks the pyhdk version and omits variables unsupported in prior versions.
Returns
-------
dict
Config keys and corresponding values.
"""
if (default := getattr(cls, "default", None)) is None:
cls.default = default = {
"enable_union": 1,
"enable_columnar_output": 1,
"enable_lazy_fetch": 0,
"null_div_by_zero": 1,
"enable_watchdog": 0,
"enable_thrift_logs": 0,
"cpu_only": 1,
}

try:
import pyhdk

if version.parse(pyhdk.__version__) >= version.parse("0.6.1"):
default["enable_lazy_dict_materialization"] = 0
default["log_dir"] = "pyhdk_log"
except ImportError:
# if pyhdk is not available, do not show any additional options
pass
return default


class OmnisciLaunchParameters(HdkLaunchParameters, type=dict):
"""
Expand Down
33 changes: 32 additions & 1 deletion modin/config/test/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import modin.config as cfg
from modin.config.envvars import EnvironmentVariable, _check_vars, ExactStr

from packaging import version


@pytest.fixture
def make_unknown_env():
Expand Down Expand Up @@ -63,9 +65,22 @@ def test_custom_help(make_custom_envvar):


def test_hdk_envvar():
try:
import pyhdk

defaults = cfg.HdkLaunchParameters.get()
assert defaults["enable_union"] == 1
if version.parse(pyhdk.__version__) >= version.parse("0.6.1"):
assert defaults["log_dir"] == "pyhdk_log"
del cfg.HdkLaunchParameters._value
except ImportError:
# This test is intended to check pyhdk internals. If pyhdk is not available, skip the version check test.
pass

os.environ[
cfg.OmnisciLaunchParameters.varname
] = "enable_union=2,enable_thrift_logs=3"
del cfg.OmnisciLaunchParameters._value
params = cfg.OmnisciLaunchParameters.get()
assert params["enable_union"] == 2
assert params["enable_thrift_logs"] == 3
Expand All @@ -74,11 +89,27 @@ def test_hdk_envvar():
assert params["enable_union"] == 2
assert params["enable_thrift_logs"] == 3

os.environ[cfg.HdkLaunchParameters.varname] = "enable_union=4,enable_thrift_logs=5"
os.environ[cfg.HdkLaunchParameters.varname] = "unsupported=X"
params = cfg.HdkLaunchParameters.get()
assert params["unsupported"] == "X"
try:
import pyhdk

pyhdk.buildConfig(**cfg.HdkLaunchParameters.get())
except RuntimeError as e:
assert str(e) == "unrecognised option '--unsupported'"
except ImportError:
# This test is intended to check pyhdk internals. If pyhdk is not available, skip the version check test.
pass

os.environ[
cfg.HdkLaunchParameters.varname
] = "enable_union=4,enable_thrift_logs=5,enable_lazy_dict_materialization=6"
del cfg.HdkLaunchParameters._value
params = cfg.HdkLaunchParameters.get()
assert params["enable_union"] == 4
assert params["enable_thrift_logs"] == 5
assert params["enable_lazy_dict_materialization"] == 6

params = cfg.OmnisciLaunchParameters.get()
assert params["enable_union"] == 2
Expand Down

0 comments on commit 441c0be

Please sign in to comment.