Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new GC option #907

Merged
merged 3 commits into from
Jan 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion llmfoundry/callbacks/scheduled_gc_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

import gc
from typing import Optional

import torch
from composer.core import Callback, State
Expand All @@ -19,16 +20,19 @@ class ScheduledGarbageCollector(Callback):
"""Disable automatic garbage collection and collect garbage at interval.

Args:
batch_interval (int): Number of batches between checkpoints call to gc.collect()
batch_interval (int): Number of batches between calls to gc.collect()
gen_1_batch_interval(int, optional): Number of batches between calls to gc.collect(1)
eval_keep_disabled (bool): keep gc disabled during eval (default: False)
"""

def __init__(
self,
batch_interval: int,
gen_1_batch_interval: Optional[int] = None,
eval_keep_disabled: bool = False,
):
self.batch_interval = batch_interval
self.gen_1_batch_interval = gen_1_batch_interval
self.eval_keep_disabled = eval_keep_disabled
self.gc_init_state = None

Expand Down Expand Up @@ -56,6 +60,9 @@ def fit_end(self, state: State, logger: Logger) -> None:
def before_dataloader(self, state: State, logger: Logger) -> None:
del logger # unused

if self.gen_1_batch_interval is not None and state.timestamp.batch.value % self.gen_1_batch_interval == 0:
gc.collect(1)

if state.timestamp.batch.value % self.batch_interval == 0:
gc_cuda()

Expand Down
Loading