From 210910e47d3b7f3919f2496b019050d890ad4b5b Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Tue, 15 Oct 2024 22:19:13 -0400 Subject: [PATCH] small edits --- examples/torch-training/TorchBasicExample.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/torch-training/TorchBasicExample.py b/examples/torch-training/TorchBasicExample.py index c5a260f93..ffd502450 100644 --- a/examples/torch-training/TorchBasicExample.py +++ b/examples/torch-training/TorchBasicExample.py @@ -219,7 +219,11 @@ def return_status(self): # Define a cluster type - here we launch an on-demand AWS cluster with 1 NVIDIA A10G GPU. # You can use any cloud you want, or existing compute cluster = rh.ondemand_cluster( - name="a10g-cluster", instance_type="A10G:1", provider="aws" + name="a10g-cluster", + instance_type="A10G:1", + provider="aws", + autopstop_mins=120 + # name="a10g-cluster", instance_type="T4:1", provider="gcp", autopstop_mins=120 # If we wanted to use GCP, for example ).up_if_not() # Next, we define the environment for our module. This includes the required dependencies that need @@ -244,9 +248,6 @@ def return_status(self): name="torch_model" ) # Instantiating it based on the remote RH module, and naming it "torch_model". - # Though we could just as easily run identical code on local - # model = SimpleTrainer() # If instantiating a local example - # We set some settings for the model training batch_size = 64 epochs = 5 @@ -286,3 +287,6 @@ def return_status(self): example_data, example_target = local_dataset[0][0].unsqueeze(0), local_dataset[0][1] prediction = model.predict(example_data) print(f"Predicted: {prediction}, Actual: {example_target}") + + # Down the cluster when done. If you have saved the cluster to Runhouse Den, you can also reuse the cluster by name for another task with `cluster = rh.cluster('a10g-cluster')` + cluster.teardown()