diff --git a/scripts/make_tutorials.py b/scripts/make_tutorials.py
index 3d8a63fe945..f515dfb5961 100644
--- a/scripts/make_tutorials.py
+++ b/scripts/make_tutorials.py
@@ -15,8 +15,14 @@
import nbformat
from bs4 import BeautifulSoup
+from memory_profiler import memory_usage
from nbconvert import HTMLExporter, ScriptExporter
+TUTORIALS_TO_SKIP = [
+ "raytune_pytorch_cnn", # TODO: Times out CI but passes locally. Investigate.
+ "early_stopping", # TODO: The trials fail. Investigate.
+]
+
TEMPLATE = """const CWD = process.cwd();
@@ -147,8 +153,7 @@ def gen_tutorials(
# prepare paths for converted tutorials & files
os.makedirs(os.path.join(repo_dir, "website", "_tutorials"), exist_ok=True)
os.makedirs(os.path.join(repo_dir, "website", "static", "files"), exist_ok=True)
- if smoke_test:
- os.environ["SMOKE_TEST"] = str(smoke_test)
+ env = {"SMOKE_TEST": "True"} if smoke_test else None
for config in tutorial_configs:
tid = config["id"]
@@ -162,32 +167,45 @@ def gen_tutorials(
nb_str = infile.read()
nb = nbformat.reads(nb_str, nbformat.NO_CONVERT)
+ total_time = None
if exec_tutorials and exec_on_build:
+ if tid in TUTORIALS_TO_SKIP:
+ print(f"Skipping {tid}")
+ continue
tutorial_path = Path(paths["tutorial_path"])
print("Executing tutorial {}".format(tid))
- start_time = time.time()
+ start_time = time.monotonic()
- # try / catch failures for now
- # will re-raise at the end
+ # Try / catch failures for now. We will re-raise at the end.
+ timeout_minutes = 15 if smoke_test else 150
try:
# Execute notebook.
- # TODO: [T163244135] Speed up tutorials and reduce timeout limits.
- timeout_minutes = 15 if smoke_test else 150
- run_script(tutorial=tutorial_path, timeout_minutes=timeout_minutes)
- total_time = time.time() - start_time
+ mem_usage, run_out = memory_usage(
+ (run_script, (tutorial_path, timeout_minutes), {"env": env}),
+ retval=True,
+ include_children=True,
+ )
+ total_time = time.monotonic() - start_time
print(
- "Done executing tutorial {}. Took {:.2f} seconds.".format(
- tid, total_time
- )
+ f"Finished executing tutorial {tid} in {total_time:.2f} seconds. "
+ f"Starting memory usage was {mem_usage[0]} MB & "
+ f"the peak memory usage was {max(mem_usage)} MB."
)
- except Exception as exc:
+ except subprocess.TimeoutExpired:
has_errors = True
- print("Couldn't execute tutorial {}!".format(tid))
- print(exc)
- total_time = None
- else:
- total_time = None
-
+ print(
+ f"Tutorial {tid} exceeded the maximum runtime of "
+ f"{timeout_minutes} minutes."
+ )
+ try:
+ run_out.check_returncode()
+ except subprocess.CalledProcessError:
+ has_errors = True
+ print(
+ f"Encountered error running tutorial {tid}: \n"
+ f"stdout: \n {run_out.stdout} \n"
+ f"stderr: \n {run_out.stderr} \n"
+ )
# convert notebook to HTML
exporter = HTMLExporter(template_name="classic")
html, _ = exporter.from_notebook_node(nb)
diff --git a/setup.py b/setup.py
index 56433d96532..3fb6cfd99b7 100644
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@
"pyro-ppl", # Required for to call run_inference.
"pytorch-lightning", # For the early stopping tutorial.
"papermill", # For executing the tutorials.
+ "memory_profiler", # For measuring memory usage of the tutorials.
]
diff --git a/tutorials/human_in_the_loop/human_in_the_loop.ipynb b/tutorials/human_in_the_loop/human_in_the_loop.ipynb
index 9e571679322..c6957ecac6d 100644
--- a/tutorials/human_in_the_loop/human_in_the_loop.ipynb
+++ b/tutorials/human_in_the_loop/human_in_the_loop.ipynb
@@ -3,7 +3,10 @@
{
"cell_type": "markdown",
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"source": [
"# Using Ax for Human-in-the-loop Experimentation¶"
@@ -38,96 +41,13 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[INFO 04-25 19:56:20] ipy_plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
+ "import inspect\n",
+ "import os\n",
+ "\n",
"from ax import (\n",
" Data,\n",
" Metric,\n",
@@ -148,13 +68,21 @@
"init_notebook_plotting()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "NOTE: The path below assumes the tutorial is being run from the root directory of the Ax package. This is needed since the jupyter notebooks may change active directory during runtime, making it tricky to find the file in a consistent way."
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "experiment = json_load.load_experiment(\"hitl_exp.json\")"
+ "curr_dir = os.path.join(os.getcwd(), \"tutorials\", \"human_in_the_loop\")\n",
+ "experiment = json_load.load_experiment(os.path.join(curr_dir, \"hitl_exp.json\"))"
]
},
{
@@ -177,66 +105,27 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "BatchTrial(experiment_name='human_in_the_loop_tutorial', index=0, status=TrialStatus.COMPLETED)"
- ]
- },
- "execution_count": 3,
- "metadata": {
- "bento_obj_id": "140009627865944"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.trials[0]"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "datetime.datetime(2019, 3, 29, 18, 10, 6)"
- ]
- },
- "execution_count": 4,
- "metadata": {
- "bento_obj_id": "140009822034240"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.trials[0].time_created"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "65"
- ]
- },
- "execution_count": 5,
- "metadata": {
- "bento_obj_id": "140012816306816"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Number of arms in first experiment, including status_quo\n",
"len(experiment.trials[0].arms)"
@@ -244,24 +133,11 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Arm(name='0_0', parameters={'x_excellent': 0.9715802669525146, 'x_good': 0.8615524768829346, 'x_moderate': 0.7668091654777527, 'x_poor': 0.34871453046798706, 'x_unknown': 0.7675797343254089, 'y_excellent': 2.900710028409958, 'y_good': 1.5137152910232545, 'y_moderate': 0.6775947093963622, 'y_poor': 0.4974367544054985, 'y_unknown': 1.0852564811706542, 'z_excellent': 517803.49761247635, 'z_good': 607874.5171427727, 'z_moderate': 1151881.2023103237, 'z_poor': 2927449.2621421814, 'z_unknown': 2068407.6935052872})"
- ]
- },
- "execution_count": 6,
- "metadata": {
- "bento_obj_id": "140009627778744"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Sample arm configuration\n",
"experiment.trials[0].arms[0]"
@@ -286,32 +162,17 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Arm(name='status_quo', parameters={'x_excellent': 0.0, 'x_good': 0.0, 'x_moderate': 0.0, 'x_poor': 0.0, 'x_unknown': 0.0, 'y_excellent': 1.0, 'y_good': 1.0, 'y_moderate': 1.0, 'y_poor': 1.0, 'y_unknown': 1.0, 'z_excellent': 1000000.0, 'z_good': 1000000.0, 'z_moderate': 1000000.0, 'z_poor': 1000000.0, 'z_unknown': 1000000.0})"
- ]
- },
- "execution_count": 7,
- "metadata": {
- "bento_obj_id": "140009821742024"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.status_quo"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"objective_metric = Metric(name=\"metric_1\")\n",
@@ -340,279 +201,28 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.dataresource+json": {
- "data": [
- {
- "arm_name": "0_1",
- "end_time": "2019-04-03T00:00:00.000Z",
- "index": 0,
- "mean": 495.7630483864,
- "metric_name": "metric_1",
- "n": 1599994,
- "sem": 2.6216409435,
- "start_time": "2019-03-30T00:00:00.000Z",
- "trial_index": 0
- },
- {
- "arm_name": "0_23",
- "end_time": "2019-04-03T00:00:00.000Z",
- "index": 1,
- "mean": 524.3677121973,
- "metric_name": "metric_1",
- "n": 1596356,
- "sem": 2.7316473644,
- "start_time": "2019-03-30T00:00:00.000Z",
- "trial_index": 0
- },
- {
- "arm_name": "0_56",
- "end_time": "2019-04-03T00:00:00.000Z",
- "index": 2,
- "mean": 21.8761495501,
- "metric_name": "metric_2",
- "n": 1600291,
- "sem": 0.0718543885,
- "start_time": "2019-03-30T00:00:00.000Z",
- "trial_index": 0
- },
- {
- "arm_name": "0_42",
- "end_time": "2019-04-03T00:00:00.000Z",
- "index": 3,
- "mean": 533.2995099946,
- "metric_name": "metric_1",
- "n": 1601500,
- "sem": 2.8198433102,
- "start_time": "2019-03-30T00:00:00.000Z",
- "trial_index": 0
- },
- {
- "arm_name": "0_43",
- "end_time": "2019-04-03T00:00:00.000Z",
- "index": 4,
- "mean": 21.338490998,
- "metric_name": "metric_2",
- "n": 1599307,
- "sem": 0.0694331648,
- "start_time": "2019-03-30T00:00:00.000Z",
- "trial_index": 0
- }
- ],
- "schema": {
- "fields": [
- {
- "name": "index",
- "type": "integer"
- },
- {
- "name": "arm_name",
- "type": "string"
- },
- {
- "name": "trial_index",
- "type": "integer"
- },
- {
- "name": "end_time",
- "type": "datetime"
- },
- {
- "name": "mean",
- "type": "number"
- },
- {
- "name": "metric_name",
- "type": "string"
- },
- {
- "name": "n",
- "type": "integer"
- },
- {
- "name": "sem",
- "type": "number"
- },
- {
- "name": "start_time",
- "type": "datetime"
- }
- ],
- "pandas_version": "0.20.0",
- "primaryKey": [
- "index"
- ]
- }
- },
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " arm_name | \n",
- " trial_index | \n",
- " end_time | \n",
- " mean | \n",
- " metric_name | \n",
- " n | \n",
- " sem | \n",
- " start_time | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0_1 | \n",
- " 0 | \n",
- " 2019-04-03 | \n",
- " 495.763048 | \n",
- " metric_1 | \n",
- " 1599994 | \n",
- " 2.621641 | \n",
- " 2019-03-30 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0_23 | \n",
- " 0 | \n",
- " 2019-04-03 | \n",
- " 524.367712 | \n",
- " metric_1 | \n",
- " 1596356 | \n",
- " 2.731647 | \n",
- " 2019-03-30 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0_56 | \n",
- " 0 | \n",
- " 2019-04-03 | \n",
- " 21.876150 | \n",
- " metric_2 | \n",
- " 1600291 | \n",
- " 0.071854 | \n",
- " 2019-03-30 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0_42 | \n",
- " 0 | \n",
- " 2019-04-03 | \n",
- " 533.299510 | \n",
- " metric_1 | \n",
- " 1601500 | \n",
- " 2.819843 | \n",
- " 2019-03-30 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0_43 | \n",
- " 0 | \n",
- " 2019-04-03 | \n",
- " 21.338491 | \n",
- " metric_2 | \n",
- " 1599307 | \n",
- " 0.069433 | \n",
- " 2019-03-30 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " arm_name trial_index end_time mean metric_name n sem \\\n",
- "0 0_1 0 2019-04-03 495.763048 metric_1 1599994 2.621641 \n",
- "1 0_23 0 2019-04-03 524.367712 metric_1 1596356 2.731647 \n",
- "2 0_56 0 2019-04-03 21.876150 metric_2 1600291 0.071854 \n",
- "3 0_42 0 2019-04-03 533.299510 metric_1 1601500 2.819843 \n",
- "4 0_43 0 2019-04-03 21.338491 metric_2 1599307 0.069433 \n",
- "\n",
- " start_time \n",
- "0 2019-03-30 \n",
- "1 2019-03-30 \n",
- "2 2019-03-30 \n",
- "3 2019-03-30 \n",
- "4 2019-03-30 "
- ]
- },
- "execution_count": 9,
- "metadata": {
- "bento_obj_id": "140009626802104"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "data = Data(pd.read_json(\"hitl_data.json\"))\n",
+ "data = Data(pd.read_json(os.path.join(curr_dir, \"hitl_data.json\")))\n",
"data.df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['0_1', '0_23', '0_56', '0_42', '0_43', '0_25', '0_44', '0_45',\n",
- " 'status_quo', '0_46', '0_27', '0_47', '0_48', '0_26', '0_49',\n",
- " '0_12', '0_5', '0_50', '0_51', '0_52', '0_0', '0_57', '0_58',\n",
- " '0_13', '0_59', '0_14', '0_6', '0_60', '0_61', '0_53', '0_62',\n",
- " '0_63', '0_7', '0_28', '0_15', '0_16', '0_17', '0_18', '0_19',\n",
- " '0_29', '0_2', '0_20', '0_21', '0_22', '0_54', '0_3', '0_30',\n",
- " '0_8', '0_10', '0_31', '0_24', '0_32', '0_33', '0_34', '0_35',\n",
- " '0_55', '0_36', '0_37', '0_38', '0_9', '0_39', '0_4', '0_11',\n",
- " '0_40', '0_41'], dtype=object)"
- ]
- },
- "execution_count": 10,
- "metadata": {
- "bento_obj_id": "140009627159648"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data.df[\"arm_name\"].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['metric_1', 'metric_2'], dtype=object)"
- ]
- },
- "execution_count": 11,
- "metadata": {
- "bento_obj_id": "140009626807312"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data.df[\"metric_name\"].unique()"
]
@@ -632,63 +242,18 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'x_excellent': RangeParameter(name='x_excellent', parameter_type=FLOAT, range=[0.0, 1.0]),\n",
- " 'x_good': RangeParameter(name='x_good', parameter_type=FLOAT, range=[0.0, 1.0]),\n",
- " 'x_moderate': RangeParameter(name='x_moderate', parameter_type=FLOAT, range=[0.0, 1.0]),\n",
- " 'x_poor': RangeParameter(name='x_poor', parameter_type=FLOAT, range=[0.0, 1.0]),\n",
- " 'x_unknown': RangeParameter(name='x_unknown', parameter_type=FLOAT, range=[0.0, 1.0]),\n",
- " 'y_excellent': RangeParameter(name='y_excellent', parameter_type=FLOAT, range=[0.1, 3.0]),\n",
- " 'y_good': RangeParameter(name='y_good', parameter_type=FLOAT, range=[0.1, 3.0]),\n",
- " 'y_moderate': RangeParameter(name='y_moderate', parameter_type=FLOAT, range=[0.1, 3.0]),\n",
- " 'y_poor': RangeParameter(name='y_poor', parameter_type=FLOAT, range=[0.1, 3.0]),\n",
- " 'y_unknown': RangeParameter(name='y_unknown', parameter_type=FLOAT, range=[0.1, 3.0]),\n",
- " 'z_excellent': RangeParameter(name='z_excellent', parameter_type=FLOAT, range=[50000.0, 5000000.0]),\n",
- " 'z_good': RangeParameter(name='z_good', parameter_type=FLOAT, range=[50000.0, 5000000.0]),\n",
- " 'z_moderate': RangeParameter(name='z_moderate', parameter_type=FLOAT, range=[50000.0, 5000000.0]),\n",
- " 'z_poor': RangeParameter(name='z_poor', parameter_type=FLOAT, range=[50000.0, 5000000.0]),\n",
- " 'z_unknown': RangeParameter(name='z_unknown', parameter_type=FLOAT, range=[50000.0, 5000000.0])}"
- ]
- },
- "execution_count": 12,
- "metadata": {
- "bento_obj_id": "140009821640096"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.search_space.parameters"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[OrderConstraint(x_poor <= x_moderate),\n",
- " OrderConstraint(x_moderate <= x_good),\n",
- " OrderConstraint(x_good <= x_excellent),\n",
- " OrderConstraint(y_poor <= y_moderate),\n",
- " OrderConstraint(y_moderate <= y_good),\n",
- " OrderConstraint(y_good <= y_excellent)]"
- ]
- },
- "execution_count": 13,
- "metadata": {
- "bento_obj_id": "140009797967816"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.search_space.parameter_constraints"
]
@@ -705,7 +270,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -730,36 +295,9 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"cv_result = cross_validate(gp)\n",
"render(tile_cross_validation(cv_result))"
@@ -774,72 +312,18 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"render(tile_fitted(gp, rel=True))"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"METRIC_X_AXIS = \"metric_1\"\n",
"METRIC_Y_AXIS = \"metric_2\"\n",
@@ -865,7 +349,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -886,36 +370,9 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"render(\n",
" plot_multiple_metrics(\n",
@@ -945,7 +402,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -967,36 +424,9 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"from ax.plot.scatter import plot_multiple_metrics\n",
"\n",
@@ -1021,10 +451,8 @@
},
{
"cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"constraint_1 = OutcomeConstraint(metric=constraint_metric, op=ComparisonOp.LEQ, bound=1)\n",
@@ -1039,38 +467,11 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"render(\n",
" plot_multiple_metrics(\n",
@@ -1093,36 +494,9 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"render(\n",
" plot_multiple_metrics(\n",
@@ -1149,22 +523,9 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "BatchTrial(experiment_name='human_in_the_loop_tutorial', index=1, status=TrialStatus.CANDIDATE)"
- ]
- },
- "execution_count": 25,
- "metadata": {
- "bento_obj_id": "140009539295832"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# We can add entire generator runs, when constructing a new trial.\n",
"trial = (\n",
@@ -1186,29 +547,9 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[Arm(name='1_0', parameters={'x_excellent': 0.7508829334076487, 'x_good': 0.40367960200772224, 'x_moderate': 0.3140989976643642, 'x_poor': 0.14559932559274122, 'x_unknown': 0.6670211538978944, 'y_excellent': 2.5425636846330546, 'y_good': 1.9418098243025033, 'y_moderate': 0.9858391295658283, 'y_poor': 0.38273584643959624, 'y_unknown': 1.5806965342880184, 'z_excellent': 4489287.686108519, 'z_good': 3540253.5809771204, 'z_moderate': 2964805.1608829396, 'z_poor': 2033780.6048510857, 'z_unknown': 2032062.1986594186}),\n",
- " Arm(name='1_1', parameters={'x_excellent': 0.6476003872239288, 'x_good': 0.31744410468794715, 'x_moderate': 0.17169895733661983, 'x_poor': 0.07453169788730113, 'x_unknown': 0.8642007362896725, 'y_excellent': 2.447230141007133, 'y_good': 1.5376602958384886, 'y_moderate': 0.6811637025094822, 'y_poor': 0.3318520722136259, 'y_unknown': 2.2510516551441038, 'z_excellent': 4072426.2914976524, 'z_good': 3806352.1749653243, 'z_moderate': 1645911.1218927982, 'z_poor': 988167.2494331661, 'z_unknown': 2661963.3926857742}),\n",
- " Arm(name='1_2', parameters={'x_excellent': 0.8054293536015693, 'x_good': 0.4404336669655842, 'x_moderate': 0.40141237536705926, 'x_poor': 0.22362006144561955, 'x_unknown': 0.5903430271180998, 'y_excellent': 2.617804090324439, 'y_good': 2.298442483961, 'y_moderate': 1.1690922032735336, 'y_poor': 0.5681654145954245, 'y_unknown': 1.3031360054446643, 'z_excellent': 4462167.1702239, 'z_good': 3731098.73420372, 'z_moderate': 3994655.203366427, 'z_poor': 2673298.8942999635, 'z_unknown': 1872273.8740227316}),\n",
- " Arm(name='1_3', parameters={'x_excellent': 0.7781327371696715, 'x_good': 0.57174929946374, 'x_moderate': 0.38386054557497773, 'x_poor': 0.1483239531374575, 'x_unknown': 0.6290782831583654, 'y_excellent': 2.5413971960197395, 'y_good': 1.8911813925901382, 'y_moderate': 1.0329065458855364, 'y_poor': 0.41007035875080056, 'y_unknown': 1.6406159955920543, 'z_excellent': 4255174.283604716, 'z_good': 3499788.950775458, 'z_moderate': 3071450.711177156, 'z_poor': 2269641.4509550007, 'z_unknown': 2090271.054327287}),\n",
- " Arm(name='1_4', parameters={'x_excellent': 0.6900739925384755, 'x_good': 0.5544791798816763, 'x_moderate': 0.22055916168207798, 'x_poor': 0.10245330233132562, 'x_unknown': 0.8355320141299903, 'y_excellent': 2.4681759096597897, 'y_good': 1.3517329904980873, 'y_moderate': 0.7109854013391809, 'y_poor': 0.2659656900117545, 'y_unknown': 2.069519817354787, 'z_excellent': 4019003.1305046123, 'z_good': 3708773.5492286514, 'z_moderate': 1891304.5997673508, 'z_poor': 1257805.979820268, 'z_unknown': 3209971.194920286}),\n",
- " Arm(name='1_5', parameters={'x_excellent': 0.84017169665951, 'x_good': 0.5080744603806646, 'x_moderate': 0.4093403112065996, 'x_poor': 0.26313460758317314, 'x_unknown': 0.5983032148893116, 'y_excellent': 2.589525158599443, 'y_good': 2.2354290056846433, 'y_moderate': 1.1617987885088201, 'y_poor': 0.7150067923774204, 'y_unknown': 1.5015776169699209, 'z_excellent': 3959983.5534502217, 'z_good': 3990619.622250669, 'z_moderate': 4302002.350836964, 'z_poor': 2736761.6846693275, 'z_unknown': 2962895.922472194}),\n",
- " Arm(name='1_6', parameters={'x_excellent': 0.7934346148309306, 'x_good': 0.7255504688128516, 'x_moderate': 0.46906013571592303, 'x_poor': 0.12673747942806995, 'x_unknown': 0.6730366227643254, 'y_excellent': 2.5406749421774055, 'y_good': 1.8477325872737815, 'y_moderate': 0.9485910267823123, 'y_poor': 0.2917996437995578, 'y_unknown': 1.4650474269621556, 'z_excellent': 3823503.8905472592, 'z_good': 3244042.3595880833, 'z_moderate': 2447219.757960169, 'z_poor': 2597221.69228601, 'z_unknown': 1804522.1057251126}),\n",
- " Arm(name='status_quo', parameters={'x_excellent': 0.0, 'x_good': 0.0, 'x_moderate': 0.0, 'x_poor': 0.0, 'x_unknown': 0.0, 'y_excellent': 1.0, 'y_good': 1.0, 'y_moderate': 1.0, 'y_poor': 1.0, 'y_unknown': 1.0, 'z_excellent': 1000000.0, 'z_good': 1000000.0, 'z_moderate': 1000000.0, 'z_poor': 1000000.0, 'z_unknown': 1000000.0})]"
- ]
- },
- "execution_count": 26,
- "metadata": {
- "bento_obj_id": "140009573436168"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.trials[1].arms"
]
@@ -1222,24 +563,9 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[GeneratorRunStruct(generator_run=GeneratorRun(3 arms, total weight 3.0), weight=1.0),\n",
- " GeneratorRunStruct(generator_run=GeneratorRun(3 arms, total weight 3.0), weight=1.0),\n",
- " GeneratorRunStruct(generator_run=GeneratorRun(1 arms, total weight 1.0), weight=1.0)]"
- ]
- },
- "execution_count": 27,
- "metadata": {
- "bento_obj_id": "140009539240520"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.trials[1]._generator_run_structs"
]
@@ -1253,22 +579,9 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "OptimizationConfig(objective=Objective(metric_name=\"metric_1\", minimize=False), outcome_constraints=[])"
- ]
- },
- "execution_count": 28,
- "metadata": {
- "bento_obj_id": "140009539294936"
- },
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"experiment.trials[1]._generator_run_structs[0].generator_run.optimization_config"
]
@@ -1276,11 +589,23 @@
],
"metadata": {
"kernelspec": {
- "display_name": "python3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/tutorials/raytune_pytorch_cnn.ipynb b/tutorials/raytune_pytorch_cnn.ipynb
index 9bb47c04a19..805e7b6fdd8 100644
--- a/tutorials/raytune_pytorch_cnn.ipynb
+++ b/tutorials/raytune_pytorch_cnn.ipynb
@@ -1,320 +1,330 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "6dba2bea-d97e-4545-9803-4242850e1807"
- },
- "source": [
- "# Ax Service API with RayTune on PyTorch CNN\n",
- "\n",
- "Ax integrates easily with different scheduling frameworks and distributed training frameworks. In this example, Ax-driven optimization is executed in a distributed fashion using [RayTune](https://ray.readthedocs.io/en/latest/tune.html). \n",
- "\n",
- "RayTune is a scalable framework for hyperparameter tuning that provides many state-of-the-art hyperparameter tuning algorithms and seamlessly scales from laptop to distributed cluster with fault tolerance. RayTune leverages [Ray](https://ray.readthedocs.io/)'s Actor API to provide asynchronous parallel and distributed execution.\n",
- "\n",
- "Ray 'Actors' are a simple and clean abstraction for replicating your Python classes across multiple workers and nodes. Each hyperparameter evaluation is asynchronously executed on a separate Ray actor and reports intermediate training progress back to RayTune. Upon reporting, RayTune then uses this information to performs actions such as early termination, re-prioritization, or checkpointing."
- ]
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "6dba2bea-d97e-4545-9803-4242850e1807"
+ },
+ "source": [
+ "# Ax Service API with RayTune on PyTorch CNN\n",
+ "\n",
+ "Ax integrates easily with different scheduling frameworks and distributed training frameworks. In this example, Ax-driven optimization is executed in a distributed fashion using [RayTune](https://ray.readthedocs.io/en/latest/tune.html). \n",
+ "\n",
+ "RayTune is a scalable framework for hyperparameter tuning that provides many state-of-the-art hyperparameter tuning algorithms and seamlessly scales from laptop to distributed cluster with fault tolerance. RayTune leverages [Ray](https://ray.readthedocs.io/)'s Actor API to provide asynchronous parallel and distributed execution.\n",
+ "\n",
+ "Ray 'Actors' are a simple and clean abstraction for replicating your Python classes across multiple workers and nodes. Each hyperparameter evaluation is asynchronously executed on a separate Ray actor and reports intermediate training progress back to RayTune. Upon reporting, RayTune then uses this information to performs actions such as early termination, re-prioritization, or checkpointing."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "fe7a9417-4bde-46d2-9de3-af1bc73bde45"
+ },
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "\n",
+ "from ray import tune\n",
+ "from ray.train import report\n",
+ "from ray.tune.search.ax import AxSearch\n",
+ "\n",
+ "logger = logging.getLogger(tune.__name__)\n",
+ "logger.setLevel(\n",
+ " level=logging.CRITICAL\n",
+ ") # Reduce the number of Ray warnings that are not relevant here."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "19956234-25ae-4e72-9d72-dbcd1b90e530"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "from ax.plot.contour import plot_contour\n",
+ "from ax.plot.trace import optimization_trace_single_method\n",
+ "from ax.service.ax_client import AxClient\n",
+ "from ax.utils.notebook.plotting import init_notebook_plotting, render\n",
+ "from ax.utils.tutorials.cnn_utils import CNN, evaluate, load_mnist, train\n",
+ "\n",
+ "init_notebook_plotting()"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "a26e18f8-caa7-411d-809a-61a9229cd6c6"
+ },
+ "source": [
+ "## 1. Initialize client\n",
+ "We specify `enforce_sequential_optimization` as False, because Ray runs many trials in parallel. With the sequential optimization enforcement, `AxClient` would expect the first few trials to be completed with data before generating more trials.\n",
+ "\n",
+ "When high parallelism is not required, it is best to enforce sequential optimization, as it allows for achieving optimal results in fewer (but sequential) trials. In cases where parallelism is important, such as with distributed training using Ray, we choose to forego minimizing resource utilization and run more trials in parallel."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "a91e1cb2-999a-4b88-a2d2-85d0acaa8854"
+ },
+ "outputs": [],
+ "source": [
+ "ax = AxClient(enforce_sequential_optimization=False)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "1766919c-fb6f-4271-a8e1-6f972eee78f3"
+ },
+ "source": [
+ "## 2. Set up experiment\n",
+ "Here we set up the search space and specify the objective; refer to the Ax API tutorials for more detail."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "37e367d4-d09d-425b-98f7-c8849d9be4b7"
+ },
+ "outputs": [],
+ "source": [
+ "MINIMIZE = False # Whether we should be minimizing or maximizing the objective"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "777c8d33-2cd1-4425-b45f-2a44922dce7d"
+ },
+ "outputs": [],
+ "source": [
+ "ax.create_experiment(\n",
+ " name=\"mnist_experiment\",\n",
+ " parameters=[\n",
+ " {\"name\": \"lr\", \"type\": \"range\", \"bounds\": [1e-6, 0.4], \"log_scale\": True},\n",
+ " {\"name\": \"momentum\", \"type\": \"range\", \"bounds\": [0.0, 1.0]},\n",
+ " ],\n",
+ " objective_name=\"mean_accuracy\",\n",
+ " minimize=MINIMIZE,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "589e4d80-02ae-461d-babc-0f96718f623e"
+ },
+ "outputs": [],
+ "source": [
+ "ax.experiment.optimization_config.objective.minimize"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "773a2c32-4ff3-4e92-8996-325504ce953e"
+ },
+ "outputs": [],
+ "source": [
+ "load_mnist(\n",
+ " data_path=\"~/.data\"\n",
+ ") # Pre-load the dataset before the initial evaluations are executed."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "5fec848a-3538-489c-bcdd-a74051f48140"
+ },
+ "source": [
+ "## 3. Define how to evaluate trials\n",
+ "Since we use the Ax Service API here, we evaluate the parameterizations that Ax suggests, using RayTune. The evaluation function follows its usual pattern, taking in a parameterization and outputting an objective value. For detail on evaluation functions, see [Trial Evaluation](https://ax.dev/docs/runner.html). "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "75fce84d-35bd-45b5-b55e-f52baf26db03"
+ },
+ "outputs": [],
+ "source": [
+ "def train_evaluate(parameterization):\n",
+ " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ " train_loader, valid_loader, test_loader = load_mnist(data_path=\"~/.data\")\n",
+ " net = train(\n",
+ " net=CNN(),\n",
+ " train_loader=train_loader,\n",
+ " parameters=parameterization,\n",
+ " dtype=torch.float,\n",
+ " device=device,\n",
+ " )\n",
+ " report(\n",
+ " {\n",
+ " \"mean_accuracy\": evaluate(\n",
+ " net=net,\n",
+ " data_loader=valid_loader,\n",
+ " dtype=torch.float,\n",
+ " device=device,\n",
+ " )\n",
+ " }\n",
+ " )"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "dda3574c-5967-43ea-8d23-7a151dc59ec9"
+ },
+ "source": [
+ "## 4. Run optimization\n",
+ "Execute the Ax optimization and trial evaluation in RayTune using [AxSearch algorithm](https://ray.readthedocs.io/en/latest/tune-searchalg.html#ax-search). \n",
+ "We only run 10 trials for demonstration. It is generally recommended to run more trials for best results."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "code_folding": [],
+ "hidden_ranges": [],
+ "originalKey": "1d768bb2-d46b-4c4c-879e-3242af7555f4"
+ },
+ "outputs": [],
+ "source": [
+ "# Set up AxSearcher in RayTune\n",
+ "algo = AxSearch(ax_client=ax)\n",
+ "# Wrap AxSearcher in a concurrently limiter, to ensure that Bayesian optimization receives the\n",
+ "# data for completed trials before creating more trials\n",
+ "algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=3)\n",
+ "tune.run(\n",
+ " train_evaluate,\n",
+ " num_samples=10,\n",
+ " search_alg=algo,\n",
+ " verbose=0, # Set this level to 1 to see status updates and to 2 to also see trial results.\n",
+ " # To use GPU, specify: resources_per_trial={\"gpu\": 1}.\n",
+ ")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "cb00f812-e9e5-4208-a680-adf6619d74c4"
+ },
+ "source": [
+ "## 5. Retrieve the optimization results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "2ec54675-d0ad-4eac-aaf3-66b593037cce"
+ },
+ "outputs": [],
+ "source": [
+ "best_parameters, values = ax.get_best_parameters()\n",
+ "best_parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "50c764a6-a630-4935-9c07-ea84045e0ecc"
+ },
+ "outputs": [],
+ "source": [
+ "means, covariances = values\n",
+ "means"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "originalKey": "12a87817-4409-4f07-a912-8d60eff71d68"
+ },
+ "source": [
+ "## 6. Plot the response surface and optimization trace"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "3742f35b-6b28-49ae-a606-a138459f4964",
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "render(\n",
+ " plot_contour(\n",
+ " model=ax.generation_strategy.model,\n",
+ " param_x=\"lr\",\n",
+ " param_y=\"momentum\",\n",
+ " metric_name=\"mean_accuracy\",\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "originalKey": "6dfd23ca-1c93-4846-8e85-4560f9e40304"
+ },
+ "outputs": [],
+ "source": [
+ "# `plot_single_method` expects a 2-d array of means, because it expects to average means from multiple\n",
+ "# optimization runs, so we wrap out best objectives array in another array.\n",
+ "best_objectives = np.array(\n",
+ " [[trial.objective_mean * 100 for trial in ax.experiment.trials.values()]]\n",
+ ")\n",
+ "best_objective_plot = optimization_trace_single_method(\n",
+ " y=np.maximum.accumulate(best_objectives, axis=1),\n",
+ " title=\"Model performance vs. # of iterations\",\n",
+ " ylabel=\"Accuracy\",\n",
+ ")\n",
+ "render(best_objective_plot)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.15"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "fe7a9417-4bde-46d2-9de3-af1bc73bde45"
- },
- "outputs": [],
- "source": [
- "import logging\n",
- "\n",
- "from ray import tune\n",
- "from ray.tune import report\n",
- "from ray.tune.search.ax import AxSearch\n",
- "\n",
- "logger = logging.getLogger(tune.__name__)\n",
- "logger.setLevel(\n",
- " level=logging.CRITICAL\n",
- ") # Reduce the number of Ray warnings that are not relevant here."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "19956234-25ae-4e72-9d72-dbcd1b90e530"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import torch\n",
- "from ax.plot.contour import plot_contour\n",
- "from ax.plot.trace import optimization_trace_single_method\n",
- "from ax.service.ax_client import AxClient\n",
- "from ax.utils.notebook.plotting import init_notebook_plotting, render\n",
- "from ax.utils.tutorials.cnn_utils import CNN, evaluate, load_mnist, train\n",
- "\n",
- "init_notebook_plotting()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "a26e18f8-caa7-411d-809a-61a9229cd6c6"
- },
- "source": [
- "## 1. Initialize client\n",
- "We specify `enforce_sequential_optimization` as False, because Ray runs many trials in parallel. With the sequential optimization enforcement, `AxClient` would expect the first few trials to be completed with data before generating more trials.\n",
- "\n",
- "When high parallelism is not required, it is best to enforce sequential optimization, as it allows for achieving optimal results in fewer (but sequential) trials. In cases where parallelism is important, such as with distributed training using Ray, we choose to forego minimizing resource utilization and run more trials in parallel."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "a91e1cb2-999a-4b88-a2d2-85d0acaa8854"
- },
- "outputs": [],
- "source": [
- "ax = AxClient(enforce_sequential_optimization=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "1766919c-fb6f-4271-a8e1-6f972eee78f3"
- },
- "source": [
- "## 2. Set up experiment\n",
- "Here we set up the search space and specify the objective; refer to the Ax API tutorials for more detail."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "37e367d4-d09d-425b-98f7-c8849d9be4b7"
- },
- "outputs": [],
- "source": [
- "MINIMIZE = False # Whether we should be minimizing or maximizing the objective"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "777c8d33-2cd1-4425-b45f-2a44922dce7d"
- },
- "outputs": [],
- "source": [
- "ax.create_experiment(\n",
- " name=\"mnist_experiment\",\n",
- " parameters=[\n",
- " {\"name\": \"lr\", \"type\": \"range\", \"bounds\": [1e-6, 0.4], \"log_scale\": True},\n",
- " {\"name\": \"momentum\", \"type\": \"range\", \"bounds\": [0.0, 1.0]},\n",
- " ],\n",
- " objective_name=\"mean_accuracy\",\n",
- " minimize=MINIMIZE,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "589e4d80-02ae-461d-babc-0f96718f623e"
- },
- "outputs": [],
- "source": [
- "ax.experiment.optimization_config.objective.minimize"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "773a2c32-4ff3-4e92-8996-325504ce953e"
- },
- "outputs": [],
- "source": [
- "load_mnist(\n",
- " data_path=\"~/.data\"\n",
- ") # Pre-load the dataset before the initial evaluations are executed."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "5fec848a-3538-489c-bcdd-a74051f48140"
- },
- "source": [
- "## 3. Define how to evaluate trials\n",
- "Since we use the Ax Service API here, we evaluate the parameterizations that Ax suggests, using RayTune. The evaluation function follows its usual pattern, taking in a parameterization and outputting an objective value. For detail on evaluation functions, see [Trial Evaluation](https://ax.dev/docs/runner.html). "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "75fce84d-35bd-45b5-b55e-f52baf26db03"
- },
- "outputs": [],
- "source": [
- "def train_evaluate(parameterization):\n",
- " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
- " train_loader, valid_loader, test_loader = load_mnist(data_path=\"~/.data\")\n",
- " net = train(\n",
- " net=CNN(),\n",
- " train_loader=train_loader,\n",
- " parameters=parameterization,\n",
- " dtype=torch.float,\n",
- " device=device,\n",
- " )\n",
- " report(\n",
- " mean_accuracy=evaluate(\n",
- " net=net,\n",
- " data_loader=valid_loader,\n",
- " dtype=torch.float,\n",
- " device=device,\n",
- " )\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "dda3574c-5967-43ea-8d23-7a151dc59ec9"
- },
- "source": [
- "## 4. Run optimization\n",
- "Execute the Ax optimization and trial evaluation in RayTune using [AxSearch algorithm](https://ray.readthedocs.io/en/latest/tune-searchalg.html#ax-search):"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "code_folding": [],
- "hidden_ranges": [],
- "originalKey": "1d768bb2-d46b-4c4c-879e-3242af7555f4"
- },
- "outputs": [],
- "source": [
- "# Set up AxSearcher in RayTune\n",
- "algo = AxSearch(ax_client=ax)\n",
- "# Wrap AxSearcher in a concurrently limiter, to ensure that Bayesian optimization receives the\n",
- "# data for completed trials before creating more trials\n",
- "algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=3)\n",
- "tune.run(\n",
- " train_evaluate,\n",
- " num_samples=30,\n",
- " search_alg=algo,\n",
- " verbose=0, # Set this level to 1 to see status updates and to 2 to also see trial results.\n",
- " # To use GPU, specify: resources_per_trial={\"gpu\": 1}.\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "cb00f812-e9e5-4208-a680-adf6619d74c4"
- },
- "source": [
- "## 5. Retrieve the optimization results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "2ec54675-d0ad-4eac-aaf3-66b593037cce"
- },
- "outputs": [],
- "source": [
- "best_parameters, values = ax.get_best_parameters()\n",
- "best_parameters"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "50c764a6-a630-4935-9c07-ea84045e0ecc"
- },
- "outputs": [],
- "source": [
- "means, covariances = values\n",
- "means"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "originalKey": "12a87817-4409-4f07-a912-8d60eff71d68"
- },
- "source": [
- "## 6. Plot the response surface and optimization trace"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "3742f35b-6b28-49ae-a606-a138459f4964",
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "render(\n",
- " plot_contour(\n",
- " model=ax.generation_strategy.model,\n",
- " param_x=\"lr\",\n",
- " param_y=\"momentum\",\n",
- " metric_name=\"mean_accuracy\",\n",
- " )\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "originalKey": "6dfd23ca-1c93-4846-8e85-4560f9e40304"
- },
- "outputs": [],
- "source": [
- "# `plot_single_method` expects a 2-d array of means, because it expects to average means from multiple\n",
- "# optimization runs, so we wrap out best objectives array in another array.\n",
- "best_objectives = np.array(\n",
- " [[trial.objective_mean * 100 for trial in ax.experiment.trials.values()]]\n",
- ")\n",
- "best_objective_plot = optimization_trace_single_method(\n",
- " y=np.maximum.accumulate(best_objectives, axis=1),\n",
- " title=\"Model performance vs. # of iterations\",\n",
- " ylabel=\"Accuracy\",\n",
- ")\n",
- "render(best_objective_plot)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat": 4,
+ "nbformat_minor": 2
}