diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py
index a9506e67cb..ce398a8b2d 100644
--- a/llmfoundry/models/hf/hf_causal_lm.py
+++ b/llmfoundry/models/hf/hf_causal_lm.py
@@ -65,9 +65,7 @@ def __init__(self, om_model_config: Union[DictConfig,
                                               nn.Module],
                  tokenizer: PreTrainedTokenizerBase):
         # set up training and eval metrics
-        use_train_metrics = om_model_config.get('use_train_metrics', True)
-        train_metrics = [LanguageCrossEntropy(),
-                         LanguagePerplexity()] if use_train_metrics else []
+        train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()]
         eval_metrics = [
             LanguageCrossEntropy(),
             LanguagePerplexity(),
@@ -91,6 +89,9 @@ def __init__(self, om_model_config: Union[DictConfig,
                     'which is not significantly slower and not compatible with the LLM foundry training code, rather than the code release by MosaicML.'
                 )
 
+            if not om_model_config.get('use_train_metrics', True):
+                train_metrics = []
+
             # load the model config
             trust_remote_code = om_model_config.get('trust_remote_code', True)
             use_auth_token = om_model_config.get('use_auth_token', False)
diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py
index 2545f5b5d1..cd162195b6 100644
--- a/llmfoundry/models/mpt/modeling_mpt.py
+++ b/llmfoundry/models/mpt/modeling_mpt.py
@@ -694,8 +694,7 @@ def __init__(
         hf_config = MPTConfig.from_dict(resolved_om_model_config)
         model = MPTForCausalLM(hf_config)
 
-        use_train_metrics = resolved_om_model_config.get(
-            'use_train_metrics', True)
+        use_train_metrics = om_model_config.get('use_train_metrics', True)
         train_metrics = [LanguageCrossEntropy(),
                          LanguagePerplexity()] if use_train_metrics else []
         eval_metrics = [