From b32e48c8af411e7830b132bf3c6a5fe02cd7ac69 Mon Sep 17 00:00:00 2001 From: Jose Javier <26491792+josejg@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:13:01 -0700 Subject: [PATCH] Register NaN Monitor Callback (#1471) --- llmfoundry/callbacks/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llmfoundry/callbacks/__init__.py b/llmfoundry/callbacks/__init__.py index ef3d164890..660f282267 100644 --- a/llmfoundry/callbacks/__init__.py +++ b/llmfoundry/callbacks/__init__.py @@ -7,6 +7,7 @@ LRMonitor, MemoryMonitor, MemorySnapshot, + NaNMonitor, OOMObserver, OptimizerMonitor, RuntimeEstimator, @@ -58,6 +59,7 @@ callbacks.register('run_timeout', func=RunTimeoutCallback) callbacks.register('loss_perp_v_len', func=LossPerpVsContextLengthLogger) callbacks.register('env_logging', func=EnvironmentLoggingCallback) +callbacks.register('nan_monitor', func=NaNMonitor) callbacks_with_config.register('async_eval', func=AsyncEval) callbacks_with_config.register('curriculum_learning', func=CurriculumLearning)