From 58fc9d1070437368959f18c86cc794e2345e9df7 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Mon, 2 Oct 2023 16:31:57 -0400 Subject: [PATCH 1/3] free mem --- llmfoundry/models/hf/hf_causal_lm.py | 6 ++---- llmfoundry/models/mpt/modeling_mpt.py | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index d5ef2435f9..a822bb02ab 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -65,10 +65,8 @@ def __init__(self, om_model_config: Union[DictConfig, nn.Module], tokenizer: PreTrainedTokenizerBase): # set up training and eval metrics - train_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - ] + use_train_metrics = om_model_config.get('use_train_metrics', True) + train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] if use_train_metrics else [] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index b1dff15398..389bf6883d 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -694,7 +694,8 @@ def __init__( hf_config = MPTConfig.from_dict(resolved_om_model_config) model = MPTForCausalLM(hf_config) - train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] + use_train_metrics = resolved_om_model_config.get('use_train_metrics', True) + train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] if use_train_metrics else [] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), From 256bb01ce8d56486829090e69be1d23465b77550 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Mon, 2 Oct 2023 16:44:10 -0400 Subject: [PATCH 2/3] lint --- llmfoundry/models/hf/hf_causal_lm.py | 3 ++- llmfoundry/models/mpt/modeling_mpt.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index a822bb02ab..a9506e67cb 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -66,7 +66,8 @@ def __init__(self, om_model_config: Union[DictConfig, tokenizer: PreTrainedTokenizerBase): # set up training and eval metrics use_train_metrics = om_model_config.get('use_train_metrics', True) - train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] if use_train_metrics else [] + train_metrics = [LanguageCrossEntropy(), + LanguagePerplexity()] if use_train_metrics else [] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index 389bf6883d..2545f5b5d1 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -694,8 +694,10 @@ def __init__( hf_config = MPTConfig.from_dict(resolved_om_model_config) model = MPTForCausalLM(hf_config) - use_train_metrics = resolved_om_model_config.get('use_train_metrics', True) - train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] if use_train_metrics else [] + use_train_metrics = resolved_om_model_config.get( + 'use_train_metrics', True) + train_metrics = [LanguageCrossEntropy(), + LanguagePerplexity()] if use_train_metrics else [] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), From 9ecfce3d52c8521256be91c8a2a1e561b62f06b1 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Mon, 2 Oct 2023 18:11:28 -0400 Subject: [PATCH 3/3] lint --- llmfoundry/models/hf/hf_causal_lm.py | 7 ++++--- llmfoundry/models/mpt/modeling_mpt.py | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index a9506e67cb..ce398a8b2d 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -65,9 +65,7 @@ def __init__(self, om_model_config: Union[DictConfig, nn.Module], tokenizer: PreTrainedTokenizerBase): # set up training and eval metrics - use_train_metrics = om_model_config.get('use_train_metrics', True) - train_metrics = [LanguageCrossEntropy(), - LanguagePerplexity()] if use_train_metrics else [] + train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), @@ -91,6 +89,9 @@ def __init__(self, om_model_config: Union[DictConfig, 'which is not significantly slower and not compatible with the LLM foundry training code, rather than the code release by MosaicML.' ) + if not om_model_config.get('use_train_metrics', True): + train_metrics = [] + # load the model config trust_remote_code = om_model_config.get('trust_remote_code', True) use_auth_token = om_model_config.get('use_auth_token', False) diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index 2545f5b5d1..cd162195b6 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -694,8 +694,7 @@ def __init__( hf_config = MPTConfig.from_dict(resolved_om_model_config) model = MPTForCausalLM(hf_config) - use_train_metrics = resolved_om_model_config.get( - 'use_train_metrics', True) + use_train_metrics = om_model_config.get('use_train_metrics', True) train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] if use_train_metrics else [] eval_metrics = [