Skip to content

Commit

Permalink
Update Optim to DecoupledSGD in Notebooks (mosaicml#3554)
Browse files Browse the repository at this point in the history
  • Loading branch information
mvpatel2000 authored Aug 19, 2024
1 parent 9cb3364 commit 98da5f8
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 9 deletions.
6 changes: 3 additions & 3 deletions composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2308,9 +2308,9 @@ def fit(
) == torch.optim.SGD and version.parse(torch.__version__) >= version.parse('2.4.0'):
raise ValueError(
'PyTorch 2.4 breaks (distributed) checkpointing with SGD. '
'Please use a different optimizer, e.g. composer.optim.DecoupledSGDW '
'instead. See https://github.com/pytorch/pytorch/issues/133415 '
'for further information.',
'Please use a different optimizer, e.g. composer.optim.DecoupledSGDW, '
'instead or downgrade to PyTorch <2.4. See ',
'https://github.com/pytorch/pytorch/issues/133415 for further information.',
)

if self.state.max_duration is None:
Expand Down
8 changes: 5 additions & 3 deletions examples/TPU_Training_in_composer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
"# %pip install 'mosaicml @ git+https://github.com/mosaicml/composer.git'\"\n",
"\n",
"from composer import Trainer\n",
"from composer.models import ComposerClassifier"
"from composer.models import ComposerClassifier\n",
"from composer.optim import DecoupledSGDW"
]
},
{
Expand Down Expand Up @@ -166,10 +167,11 @@
"\n",
"model = model.to(xm.xla_device())\n",
"\n",
"optimizer = torch.optim.SGD(\n",
"optimizer = DecoupledSGDW(\n",
" model.parameters(),\n",
" lr=0.02,\n",
" momentum=0.9)\n"
" momentum=0.9,\n",
")"
]
},
{
Expand Down
6 changes: 4 additions & 2 deletions examples/exporting_for_inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,9 @@
"import torch\n",
"from composer import Trainer\n",
"from composer.algorithms import SqueezeExcite\n",
"from composer.optim import DecoupledSGDW\n",
"\n",
"optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
"optimizer = DecoupledSGDW(model.parameters(), lr=0.01)\n",
"scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)\n",
"\n",
"trainer = Trainer(\n",
Expand All @@ -318,7 +319,8 @@
" callbacks=[export_callback],\n",
" max_duration='2ep',\n",
" save_interval='1ep',\n",
" save_overwrite=True)\n",
" save_overwrite=True,\n",
")\n",
"trainer.fit()"
]
},
Expand Down
3 changes: 2 additions & 1 deletion examples/migrate_from_ptl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,10 @@
"source": [
"from composer import Trainer\n",
"from composer.algorithms import BlurPool\n",
"from composer.optim import DecoupledSGDW\n",
"\n",
"model = MosaicResnet()\n",
"optimizer = torch.optim.SGD(\n",
"optimizer = DecoupledSGDW(\n",
" model.parameters(),\n",
" lr=0.05,\n",
" momentum=0.9,\n",
Expand Down

0 comments on commit 98da5f8

Please sign in to comment.