Skip to content

Commit

Permalink
Fix DDP unused param error when TE is enabled in NeMo Lite (#11364)
Browse files Browse the repository at this point in the history
* Fix DDP unused param error when TE is enabled

Signed-off-by: Onur Yilmaz <[email protected]>

* Added partial function for te

Signed-off-by: Onur Yilmaz <[email protected]>

* Apply isort and black reformatting

Signed-off-by: oyilmaz-nvidia <[email protected]>

---------

Signed-off-by: Onur Yilmaz <[email protected]>
Signed-off-by: oyilmaz-nvidia <[email protected]>
Co-authored-by: oyilmaz-nvidia <[email protected]>
  • Loading branch information
oyilmaz-nvidia and oyilmaz-nvidia authored Nov 24, 2024
1 parent e83d3ea commit 3afcde0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
23 changes: 12 additions & 11 deletions examples/llm/sft/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from nemo import lightning as nl
from nemo.collections import llm
from nemo.lightning.pytorch.accelerate.transformer_engine import is_te_accelerated, te_accelerate
from nemo.lightning.pytorch.accelerate.transformer_engine import is_te_accelerated
from nemo.lightning.pytorch.callbacks import ModelCallback


Expand Down Expand Up @@ -75,16 +75,17 @@ def squad(tokenizer) -> pl.LightningDataModule:
grad_clip = None
use_dist_samp = False

model = llm.HfAutoModelForCausalLM(args.model)
tokenizer = model.tokenizer
model_accelerator = None
if args.model_accelerator == "te":
from functools import partial
from nemo.lightning.pytorch.accelerate.transformer_engine import te_accelerate

callbacks = []
if args.model_accelerator:
if args.model_accelerator == "te":
model_transform = ModelCallback(
on_train_start=lambda model: te_accelerate(model, fp8_autocast=args.fp8_autocast)
)
callbacks.append(model_transform)
model_accelerator = partial(te_accelerate, fp8_autocast=args.fp8_autocast)

from nemo.lightning.pytorch.accelerate.transformer_engine import te_accelerate

model = llm.HfAutoModelForCausalLM(model_name=args.model, model_accelerator=model_accelerator)
tokenizer = model.tokenizer

llm.api.finetune(
model=model,
Expand All @@ -100,7 +101,7 @@ def squad(tokenizer) -> pl.LightningDataModule:
accumulate_grad_batches=10,
gradient_clip_val=grad_clip,
use_distributed_sampler=use_dist_samp,
callbacks=callbacks,
callbacks=[],
logger=wandb,
),
optim=fdl.build(llm.adam.pytorch_adam_with_flat_lr(lr=1e-5)),
Expand Down
6 changes: 6 additions & 0 deletions nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
tokenizer=None,
loss_fn=masked_cross_entropy,
model_transform=None,
model_accelerator=None,
trust_remote_code=False,
):
super().__init__()
Expand All @@ -50,6 +51,7 @@ def __init__(
self.load_pretrained_weights = load_pretrained_weights
self.is_hf_model = True
self.model_transform = model_transform
self.model_accelerator = model_accelerator
self.trust_remote_code = trust_remote_code

@property
Expand Down Expand Up @@ -78,6 +80,10 @@ def configure_model(self):

config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=self.trust_remote_code)
self.model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code)

if self.model_accelerator is not None:
self.model_accelerator(self.model)

self.model.train()

def forward(self, input_ids, attention_mask=None, labels=None, loss_mask=None):
Expand Down

0 comments on commit 3afcde0

Please sign in to comment.