-
Notifications
You must be signed in to change notification settings - Fork 52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add hpobench wrapper #993
base: develop
Are you sure you want to change the base?
add hpobench wrapper #993
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,3 +16,4 @@ Task modules | |
task/rosenbrock | ||
task/forrester | ||
task/profet | ||
task/hpobench |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
HPOBench | ||
============================= | ||
|
||
.. automodule:: orion.benchmark.task.hpobench | ||
:members: |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,6 +63,10 @@ | |
"pymoo==0.5.0", | ||
"hebo @ git+https://github.com/huawei-noah/[email protected]#egg=hebo&subdirectory=HEBO", | ||
], | ||
"hpobench": [ | ||
"openml", | ||
"hpobench @ git+https://github.com/automl/HPOBench.git@master#egg=hpobench", | ||
], | ||
} | ||
extras_require["all"] = sorted(set(sum(extras_require.values(), []))) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
""" | ||
Task for HPOBench | ||
================= | ||
""" | ||
import importlib | ||
import subprocess | ||
from typing import Dict, List | ||
|
||
from orion.algo.space.configspace import to_orionspace | ||
from orion.benchmark.task.base import BenchmarkTask | ||
from orion.core.utils.module_import import ImportOptional | ||
|
||
with ImportOptional("HPOBench", "hpobench") as import_optional: | ||
from hpobench import __version__ as hpobench_version | ||
|
||
print(f"HPOBench version: {hpobench_version}") | ||
|
||
|
||
class HPOBench(BenchmarkTask): | ||
"""Benchmark Task wrapper over HPOBench (https://github.com/automl/HPOBench) | ||
|
||
For more information on HPOBench, see original paper at https://arxiv.org/abs/2109.06716. | ||
|
||
Katharina Eggensperger, Philipp Müller, Neeratyoy Mallik, Matthias Feurer, René Sass, Aaron Klein, | ||
Noor Awad, Marius Lindauer, Frank Hutter. "HPOBench: A Collection of Reproducible Multi-Fidelity | ||
Benchmark Problems for HPO" Thirty-fifth Conference on Neural Information Processing Systems | ||
Datasets and Benchmarks Track (Round 2). | ||
|
||
Parameters | ||
---------- | ||
max_trials : int | ||
Maximum number of trials for this task. | ||
hpo_benchmark_class : str | ||
Full path to a particular class of benchmark in HPOBench. | ||
benchmark_kwargs: str | ||
Optional parameters to create benchmark instance of class `hpo_benchmark_class`. | ||
objective_function_kwargs: dict | ||
Optional parameters to use when calling `objective_function` of the benchmark instance. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
max_trials: int, | ||
hpo_benchmark_class: str = None, | ||
benchmark_kwargs: dict = None, | ||
objective_function_kwargs: dict = None, | ||
): | ||
import_optional.ensure() | ||
super().__init__( | ||
max_trials=max_trials, | ||
hpo_benchmark_class=hpo_benchmark_class, | ||
benchmark_kwargs=benchmark_kwargs, | ||
objective_function_kwargs=objective_function_kwargs, | ||
) | ||
self._verify_benchmark(hpo_benchmark_class) | ||
self.hpo_benchmark_cls = self._load_benchmark(hpo_benchmark_class) | ||
self.benchmark_kwargs = dict() if benchmark_kwargs is None else benchmark_kwargs | ||
self.objective_function_kwargs = ( | ||
dict() if objective_function_kwargs is None else objective_function_kwargs | ||
) | ||
|
||
def call(self, **kwargs) -> List[Dict]: | ||
hpo_benchmark = self.hpo_benchmark_cls(**self.benchmark_kwargs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @donglinjy Did you figure out why the singularity container is destroyed at the end of the subprocess call? If we could avoid this then we could only pay the price of building it during task instantiation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is caused by how we now run the trials, if we create the containerized benchmark during task init, we serialize the task instance and then deserialize it as a new task instance to run, after the run complete, this new instance will be destroyed which will cause the singularity container shutdown too. Although I mentioned another possible solution at #993 (comment), which will ask some additional change to make it work in remote workers scenario. |
||
result_dict = hpo_benchmark.objective_function( | ||
configuration=kwargs, **self.objective_function_kwargs | ||
) | ||
objective = result_dict["function_value"] | ||
return [ | ||
dict( | ||
name=self.hpo_benchmark_cls.__name__, type="objective", value=objective | ||
) | ||
] | ||
|
||
def _load_benchmark(self, hpo_benchmark_class: str): | ||
package, cls = hpo_benchmark_class.rsplit(".", 1) | ||
module = importlib.import_module(package) | ||
return getattr(module, cls) | ||
|
||
def _verify_benchmark(self, hpo_benchmark_class: str): | ||
if not hpo_benchmark_class: | ||
raise AttributeError("Please provide full path to a HPOBench benchmark") | ||
if "container" in hpo_benchmark_class: | ||
code, message = subprocess.getstatusoutput("singularity -h") | ||
if code != 0: | ||
raise AttributeError( | ||
"Can not run containerized benchmark without Singularity: {}".format( | ||
message | ||
) | ||
) | ||
|
||
def get_search_space(self) -> Dict[str, str]: | ||
configuration_space = self.hpo_benchmark_cls( | ||
**self.benchmark_kwargs | ||
).get_configuration_space() | ||
return to_orionspace(configuration_space) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
import inspect | ||
|
||
import pytest | ||
|
||
from orion.algo.space import Space | ||
from orion.benchmark.task import HPOBench | ||
from orion.benchmark.task.hpobench import import_optional | ||
|
||
hpobench_benchmarks = list() | ||
hpobench_benchmarks.append( | ||
{ | ||
"type": "tabular", | ||
"class": "hpobench.container.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
"init_args": dict(model="xgb", task_id=168912), | ||
"objective_args": dict(), | ||
"hyperparams": { | ||
"colsample_bytree": 1.0, | ||
"eta": 0.045929204672575, | ||
"max_depth": 1, | ||
"reg_lambda": 10.079368591308594, | ||
}, | ||
} | ||
) | ||
|
||
hpobench_benchmarks.append( | ||
{ | ||
"type": "raw", | ||
"class": "hpobench.container.benchmarks.ml.xgboost_benchmark.XGBoostBenchmark", | ||
"init_args": dict(task_id=168912), | ||
"objective_args": dict(), | ||
"hyperparams": { | ||
"colsample_bytree": 1.0, | ||
"eta": 0.045929204672575, | ||
"max_depth": 1, | ||
"reg_lambda": 10.079368591308594, | ||
}, | ||
} | ||
) | ||
|
||
""" | ||
# need fix of https://github.com/Epistimio/orion/issues/1018 | ||
hpobench_benchmarks.append({ | ||
"type": "surrogate", | ||
"class": "hpobench.container.benchmarks.surrogates.paramnet_benchmark.ParamNetAdultOnStepsBenchmark", | ||
"init_args": dict(), | ||
"objective_args": dict(), | ||
"hyperparams": { | ||
"average_units_per_layer_log2": 6.0, | ||
"batch_size_log2": 5.5, | ||
"dropout_0": 0.25, | ||
"dropout_1": 0.25, | ||
"final_lr_fraction_log2": 1.0 | ||
} | ||
}) | ||
""" | ||
|
||
|
||
@pytest.mark.skipif( | ||
import_optional.failed, | ||
reason="Running without HPOBench", | ||
) | ||
class TestHPOBench: | ||
"""Test benchmark task HPOBenchWrapper""" | ||
|
||
def test_create_with_non_container_benchmark(self): | ||
"""Test to create HPOBench local benchmark""" | ||
task = HPOBench( | ||
max_trials=2, | ||
hpo_benchmark_class="hpobench.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
benchmark_kwargs=dict(model="xgb", task_id=168912), | ||
) | ||
assert task.max_trials == 2 | ||
assert inspect.isclass(task.hpo_benchmark_cls) | ||
assert task.configuration == { | ||
"HPOBench": { | ||
"hpo_benchmark_class": "hpobench.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
"benchmark_kwargs": {"model": "xgb", "task_id": 168912}, | ||
"objective_function_kwargs": None, | ||
"max_trials": 2, | ||
} | ||
} | ||
|
||
def test_create_with_container_benchmark(self): | ||
"""Test to create HPOBench container benchmark""" | ||
task = HPOBench( | ||
max_trials=2, | ||
hpo_benchmark_class="hpobench.container.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
benchmark_kwargs=dict(model="xgb", task_id=168912), | ||
) | ||
assert task.max_trials == 2 | ||
assert inspect.isclass(task.hpo_benchmark_cls) | ||
assert task.configuration == { | ||
"HPOBench": { | ||
"hpo_benchmark_class": "hpobench.container.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
"benchmark_kwargs": {"model": "xgb", "task_id": 168912}, | ||
"objective_function_kwargs": None, | ||
"max_trials": 2, | ||
} | ||
} | ||
|
||
def test_run_locally(self): | ||
"""Test to run a local HPOBench benchmark""" | ||
task = HPOBench( | ||
max_trials=2, | ||
hpo_benchmark_class="hpobench.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
benchmark_kwargs=dict(model="xgb", task_id=168912), | ||
) | ||
params = { | ||
"colsample_bytree": 1.0, | ||
"eta": 0.045929204672575, | ||
"max_depth": 1.0, | ||
"reg_lambda": 10.079368591308594, | ||
} | ||
|
||
objectives = task(**params) | ||
assert objectives == [ | ||
{ | ||
"name": "TabularBenchmark", | ||
"type": "objective", | ||
"value": 0.056373193166885674, | ||
} | ||
] | ||
|
||
@pytest.mark.parametrize("benchmark", hpobench_benchmarks) | ||
def test_run_singulariys(self, benchmark): | ||
task = HPOBench( | ||
max_trials=2, | ||
hpo_benchmark_class=benchmark.get("class"), | ||
benchmark_kwargs=benchmark.get("init_args"), | ||
objective_function_kwargs=benchmark.get("objective_args"), | ||
) | ||
|
||
params = benchmark.get("hyperparams") | ||
objectives = task(**params) | ||
|
||
assert len(objectives) > 0 | ||
|
||
def test_run_singulariy(self): | ||
task = HPOBench( | ||
max_trials=2, | ||
hpo_benchmark_class="hpobench.container.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
benchmark_kwargs=dict(model="xgb", task_id=168912), | ||
) | ||
params = { | ||
"colsample_bytree": 1.0, | ||
"eta": 0.045929204672575, | ||
"max_depth": 1.0, | ||
"reg_lambda": 10.079368591308594, | ||
} | ||
|
||
objectives = task(**params) | ||
assert objectives == [ | ||
{ | ||
"name": "TabularBenchmark", | ||
"type": "objective", | ||
"value": 0.056373193166885674, | ||
} | ||
] | ||
|
||
def test_search_space(self): | ||
"""Test to get task search space""" | ||
task = HPOBench( | ||
max_trials=2, | ||
hpo_benchmark_class="hpobench.benchmarks.ml.tabular_benchmark.TabularBenchmark", | ||
benchmark_kwargs=dict(model="xgb", task_id=168912), | ||
) | ||
space = task.get_search_space() | ||
|
||
assert isinstance(space, Space) | ||
assert len(space) == 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is ordinal best mapped to categorical or integer? Categorical is loosing the importance of the ordering.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ordinal values does not have to be
int
. https://automl.github.io/ConfigSpace/main/api/hyperparameters.html#ConfigSpace.hyperparameters.OrdinalHyperparameter