diff --git a/llmfoundry/callbacks/curriculum_learning_callback.py b/llmfoundry/callbacks/curriculum_learning_callback.py
index 961bf1cae1..7ab24f53ba 100644
--- a/llmfoundry/callbacks/curriculum_learning_callback.py
+++ b/llmfoundry/callbacks/curriculum_learning_callback.py
@@ -7,47 +7,228 @@
 the future.
 """
 
+import copy
 import logging
-from typing import Any, Dict
+from typing import Any
 
-from composer.core import State
-from composer.loggers import Logger
+from composer import DataSpec
+from composer.core import State, Time, TimeUnit, ensure_time
+from composer.loggers import Logger, MosaicMLLogger
 from streaming import StreamingDataset
+from streaming.base.util import clean_stale_shared_memory
 from torch.utils.data import DataLoader
 
 from llmfoundry.interfaces import CallbackWithConfig
-from llmfoundry.utils.warnings import experimental_class
+from llmfoundry.utils.exceptions import (
+    BaseContextualError,
+    TrainDataLoaderLocation,
+)
 
 log = logging.getLogger(__name__)
 
 __all__ = ['CurriculumLearning']
 
 
-@experimental_class('CurriculumLearning callback')
 class CurriculumLearning(CallbackWithConfig):
     """Starts an epoch with a different dataset when resuming from a checkpoint.
 
     Args:
-        train_config (Dict): The configuration of the dataset currently
+        train_config (dict): The configuration of the dataset currently
             being used. Note that this is the full train config and must
-            contain the 'train_loader' key.
-        dataset_index (int): The index of the dataset currently being used.
+            contain the 'train_loader', 'device_train_batch_size', and
+            'tokenizer' keys.
+        schedule (list[dict[str, Any]]): The list of datamixes to use and their
+            durations. Duration units must match max_duration and be in terms of
+            a TimeUnit that is supported by Iteration. The duration values must
+            be positive. There must be at least one datamix in the schedule. The
+            first datamix in the schedule must match the train_loader in the
+            train_config. On resumption, previously trained on datamixes and
+            durations cannot be changed. The duration of the current datamix
+            must be greater than the saved timestamp. The dataset must be a
+            StreamingDataset.
     """
 
-    def __init__(self, train_config: Dict, dataset_index: int):
-        self.dataset_index = dataset_index
-        self.saved_dataset_index = 0
-        self.all_dataset_configs = []
-        self.current_dataset_state = {}
-        # The current dataset config is resolved and passed in train.py
-        self.current_dataset_config = train_config['train_loader']
+    def __init__(
+        self,
+        train_config: dict[str, Any],
+        schedule: list[dict[str, Any]],
+    ):
+        # Inline import to avoid circular imports
+        from llmfoundry.utils.builders import build_tokenizer
+
+        # Ensure all duration units are in epochs or tokens and values are positive
+        self._schedule = schedule
+        if len(self._schedule) == 0:
+            raise ValueError('The schedule must have at least one datamix.')
+        for index, datamix in enumerate(self._schedule):
+            self._validate_datamix(datamix)
+
+            if (
+                index == 0 and
+                train_config['train_loader'] != datamix['train_loader']
+            ):
+                raise ValueError((
+                    'The first datamix in the schedule must match the '
+                    'train_loader in the train_config.'
+                ))
+
+        self._schedule_index = 0
+
+        # Copied from llmfoundry/utils/config_utils.py
+        self.device_train_batch_size = train_config['device_train_batch_size']
+
+        # Copied from scripts/train/train.py
+        tokenizer_name = train_config['tokenizer']['name']
+        tokenizer_kwargs = train_config['tokenizer'].get('kwargs', {})
+        self.tokenizer = build_tokenizer(tokenizer_name, tokenizer_kwargs)
 
     def before_load(self, state: State, logger: Logger):
         del logger
 
-        # Save the current dataset state so we can restore it correctly
-        # if we are resuming with a new dataset.
-        train_loader = state.train_dataloader
+        # Ensure all duration units are the same as max_duration
+        datamix_units = [datamix['duration'].unit for datamix in self._schedule]
+        assert state.max_duration is not None, 'max_duration should have beeen set.'
+        if any(state.max_duration.unit != unit for unit in datamix_units):
+            raise ValueError((
+                f'All durations in the schedule must have the same units as '
+                f'the max_duration. Expected {state.max_duration.unit}, but '
+                f'got {datamix_units}.'
+            ))
+
+        # Ensure schedule duration is equal to max_duration
+        schedule_duration = Time(0, state.max_duration.unit)
+        for datamix in self._schedule:
+            assert isinstance(datamix['duration'], Time)
+            schedule_duration += datamix['duration']
+        if schedule_duration != state.max_duration:
+            raise ValueError((
+                'The sum of all durations in the schedule must be equal to the '
+                'max_duration.'
+            ))
+
+        self._validate_dataloader(state.train_dataloader)
+
+    def after_load(self, state: State, logger: Logger):
+        del logger  # unused
+
+        self._validate_dataloader(state.train_dataloader)
+
+        # If checkpoint was saved before iteration was incremented, we need to increment it now
+        if ((
+            self._schedule[self._schedule_index]['duration'].unit
+            == TimeUnit.TOKEN and state.timestamp.token_in_iteration >=
+            self._schedule[self._schedule_index]['duration'].value
+        ) or (
+            self._schedule[self._schedule_index]['duration'].unit
+            == TimeUnit.EPOCH and state.timestamp.epoch_in_iteration >=
+            self._schedule[self._schedule_index]['duration'].value
+        )):
+            log.error((
+                'The CurriculumLearning callback has detected that the previous run did not correctly '
+                'increment the iteration.'
+            ))
+            self._schedule_index += 1
+            state.timestamp = state.timestamp.to_next_iteration()
+
+    def iteration_start(self, state: State, logger: Logger):
+        # Swap the dataset if starting a new iteration that's not the original datamix
+        if self._schedule_index > 0:
+            # TODO: trainer._train_data_spec should be updated whenever the dataloader is updated
+            # Dataloaders with the same prefix access the same shared memory
+            # which is stale
+            clean_stale_shared_memory()
+            datamix = copy.deepcopy(self._schedule[self._schedule_index])
+            data_spec = self._build_train_loader(
+                train_loader_config=datamix['train_loader'],
+                logger=logger,
+            )
+            state.set_dataloader(
+                dataloader=data_spec.dataloader,
+                dataloader_label='train',
+            )
+            state.train_dataloader = state.dataloader
+            self._validate_dataloader(state.train_dataloader)
+
+        # Set the length of the new iteration
+        state._iteration_length = self._schedule[self._schedule_index
+                                                ]['duration']
+
+    def iteration_end(self, state: State, logger: Logger):
+        del state, logger  # unused
+
+        self._schedule_index += 1
+
+    def state_dict(self):
+        return {
+            'schedule': self._schedule,
+            'schedule_index': self._schedule_index,
+        }
+
+    def load_state_dict(self, state: dict[str, Any]):
+        self._schedule_index = state['schedule_index']
+
+        # Ensure that the schedule has not changed on previously trained datamixes
+        for idx in range(state['schedule_index']):
+            if self._schedule[idx] != state['schedule'][idx]:
+                raise ValueError((
+                    f'Previous datamixes must stay the same across ',
+                    f'resumptions. Expected {state["schedule"][idx]} but got ',
+                    f'{self._schedule[idx]}',
+                ))
+
+        # Ensure that the datamix has not changed on the current datamix
+        current_loader = self._schedule[self._schedule_index]['train_loader']
+        saved_loader = state['schedule'][self._schedule_index]['train_loader']
+        if current_loader != saved_loader:
+            raise ValueError((
+                f'The current datamix must stay the same across resumptions. ',
+                f'Expected {saved_loader} but got {current_loader}',
+            ))
+
+        # Ensure that the current datamix duration is greater than timestamp
+        duration = self._schedule[self._schedule_index]['duration']
+        if not isinstance(duration.unit, (TimeUnit.TOKEN, TimeUnit.EPOCH)):
+            raise ValueError((
+                f'Duration must be in terms of tokens or epochs, but got ',
+                f'{duration.unit}.',
+            ))
+        if ((
+            duration.unit == TimeUnit.TOKEN and
+            duration > state['timestamp'].token_in_iteration
+        ) or (
+            duration.unit == TimeUnit.EPOCH and
+            duration > state['timestamp'].epoch_in_iteration
+        )):
+            raise ValueError((
+                'The duration of the current datamix must be less or equal to '
+                'than the saved timestamp.'
+            ))
+
+    def _build_train_loader(
+        self,
+        train_loader_config: dict[str, Any],
+        logger: Logger,
+    ) -> DataSpec:
+        from llmfoundry.data.dataloader import build_dataloader
+
+        # Copied from scripts/train/train.py
+        log.info(
+            f'Building train loader in CurriculumLearning callback for dataset {self._schedule_index}',
+        )
+        try:
+            return build_dataloader(
+                train_loader_config,
+                self.tokenizer,
+                self.device_train_batch_size,
+            )
+        except BaseContextualError as e:
+            for destination in logger.destinations:
+                if isinstance(destination, MosaicMLLogger):
+                    e.location = TrainDataLoaderLocation
+                    destination.log_exception(e)
+            raise e
+
+    def _validate_dataloader(self, train_loader: Any):
         # Check if we are using a DataLoader and StreamingDataset
         if not isinstance(train_loader, DataLoader):
             raise ValueError(
@@ -61,54 +242,23 @@ def before_load(self, state: State, logger: Logger):
                 f'because it requires loading and saving dataset state. ',
                 f'Instead, got a dataset of type {type(dataset)}',
             )
-        assert isinstance(dataset, StreamingDataset)
-        # Save the current dataset state so we can restore it if needed.
-        self.current_dataset_state = dataset.state_dict(  # type: ignore
-            num_samples=0, from_beginning=False)
 
-    def after_load(self, state: State, logger: Logger):
-        del logger
-
-        # As saved_dataset_index is loaded from state_dict, this only runs when
-        # a user explicitly increments the dataset_index and not on any other
-        # resumption, including autoresume.
-        train_loader = state._train_dataloader
-        assert isinstance(
-            train_loader,
-            DataLoader,
-        ), 'CurriculumLearning callback requires a DataLoader.'
-        dataset = train_loader.dataset
-        assert isinstance(
-            dataset,
-            StreamingDataset,
-        ), 'CurriculumLearning callback requires a StreamingDataset.'
-        if self.saved_dataset_index < self.dataset_index:
-            # Ignore the dataset state that was read in from the checkpoint, and
-            # replace with the new dataset state. This preserves resumption info.
-            if self.current_dataset_state['epoch'] < 0:
-                # Make sure the epoch in the loaded state dict is not negative.
-                # Since `__iter__` has not yet been called on the dataset, the
-                # epoch index in the dataset will still be -1. We need to ensure
-                # that we set the epoch correctly to 0 in this case.
-                self.current_dataset_state['epoch'] = 0
-            dataset.load_state_dict(  # type: ignore
-                self.current_dataset_state)
-            # Start a new epoch since we are using a new dataset.
-            # This will also reset the sample_in_epoch written to checkpoint,
-            # making sure that subsequent resumptions proceed correctly.
-            state.timestamp = state.timestamp.to_next_epoch()
-            # Append the new dataset config to the list of all dataset configs.
-            self.all_dataset_configs.append(self.current_dataset_config)
-        elif self.dataset_index == 0 and len(self.all_dataset_configs) == 0:
-            # Make sure to track our current dataset config if we are just starting training.
-            self.all_dataset_configs.append(self.current_dataset_config)
-
-    def state_dict(self):
-        return {
-            'dataset_index': self.dataset_index,
-            'all_dataset_configs': self.all_dataset_configs,
-        }
+    def _validate_datamix(self, datamix: dict[str, Any]):
+        if 'duration' not in datamix:
+            raise ValueError('Each datamix must have a duration.')
+        datamix['duration'] = ensure_time(
+            datamix['duration'],
+            TimeUnit.EPOCH,
+        )
+        if datamix['duration'].value <= 0:
+            raise ValueError('The duration must be positive.')
+        if (
+            datamix['duration'].unit != TimeUnit.EPOCH and
+            datamix['duration'].unit != TimeUnit.TOKEN
+        ):
+            raise ValueError(
+                'Schedules can only be defined in terms of epochs or tokens.',
+            )
 
-    def load_state_dict(self, state: Dict[str, Any]):
-        self.saved_dataset_index = state.get('dataset_index', 0)
-        self.all_dataset_configs = state.get('all_dataset_configs', [])
+        if 'train_loader' not in datamix:
+            raise ValueError('Each datamix must have a train_loader.')
diff --git a/llmfoundry/data/utils.py b/llmfoundry/data/utils.py
index 206e884f70..99356170a6 100644
--- a/llmfoundry/data/utils.py
+++ b/llmfoundry/data/utils.py
@@ -133,6 +133,10 @@ def get_text_collator(
 ) -> Tuple[Union[transformers.DataCollatorForLanguageModeling,
                  ConcatenatedSequenceCollatorWrapper], int]:
     dataset_cfg = dataloader_cfg.get('dataset')
+    print(type(dataloader_cfg))
+    print(dataloader_cfg)
+    print(type(dataset_cfg))
+    print(dataset_cfg)
     assert isinstance(dataset_cfg, dict)
     eos_token_id = dataset_cfg.get('eos_token_id', None)
     bos_token_id = dataset_cfg.get('bos_token_id', None)
diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py
index a7cfac1724..c8884a03a1 100644
--- a/llmfoundry/models/mpt/modeling_mpt.py
+++ b/llmfoundry/models/mpt/modeling_mpt.py
@@ -585,8 +585,9 @@ def forward(
                     'sequence_id is a required argument when MPT is configured with attn_uses_sequence_id=True '
                     + 'and the model is in train mode.',
                 )
-            elif (self.attn_uses_sequence_id is
-                  False) and (sequence_id is not None):
+            elif (
+                self.attn_uses_sequence_id is False and sequence_id is not None
+            ):
                 warnings.warn(
                     'MPT received non-None input for `sequence_id` but is configured with attn_uses_sequence_id=False. '
                     +
@@ -1097,7 +1098,7 @@ def __init__(
 
         additional_train_metrics = additional_train_metrics or []
 
-        model = self.model_class(self.config_class(**kwargs),)
+        model = self.model_class(self.config_class(**kwargs))
 
         use_train_metrics = use_train_metrics
         train_metric_names = DEFAULT_CAUSAL_LM_TRAIN_METRICS + additional_train_metrics
diff --git a/llmfoundry/utils/__init__.py b/llmfoundry/utils/__init__.py
index c3b5b2a328..87a08a999d 100644
--- a/llmfoundry/utils/__init__.py
+++ b/llmfoundry/utils/__init__.py
@@ -3,9 +3,11 @@
 
 from llmfoundry.registry import config_transforms
 from llmfoundry.utils.builders import (
+    add_metrics_to_eval_loaders,
     build_algorithm,
     build_callback,
     build_composer_model,
+    build_eval_loaders,
     build_evaluators,
     build_icl_data_and_gauntlet,
     build_icl_evaluators,
@@ -66,8 +68,10 @@
 )
 
 __all__ = [
+    'add_metrics_to_eval_loaders',
     'build_algorithm',
     'build_callback',
+    'build_eval_loaders',
     'build_evaluators',
     'build_icl_data_and_gauntlet',
     'build_icl_evaluators',
diff --git a/tests/callbacks/test_curriculum_learning_callback.py b/tests/callbacks/test_curriculum_learning_callback.py
index bbdbf3d691..a406c0080f 100644
--- a/tests/callbacks/test_curriculum_learning_callback.py
+++ b/tests/callbacks/test_curriculum_learning_callback.py
@@ -1,14 +1,278 @@
 # Copyright 2024 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
+from contextlib import nullcontext
+from typing import Any, Callable, Optional
+from unittest.mock import MagicMock
+
+import pytest
+from composer.core import State
+from composer.core.time import Time, TimeUnit
+from composer.devices import DeviceCPU
+from composer.loggers import Logger
+from omegaconf import OmegaConf as om
+from torch.utils.data import DataLoader
+
+from llmfoundry.data.text_data import StreamingTextDataset
 from llmfoundry.utils.builders import build_callback
 
 
-def test_curriculum_learning_callback_builds():
-    kwargs = {'dataset_index': 0}
+@pytest.mark.parametrize(
+    'datamix,duration',
+    [
+        (None, '1ep'),
+        ({
+            'dataset': 'some_dataset',
+        }, '1ep'),
+        (None, '10tok'),
+        (None, ''),
+        ({}, '1ep'),
+    ],
+)
+def test_curriculum_learning_callback_init(
+    datamix: Optional[dict[str, Any]],
+    duration: str,
+    tiny_ft_dataloader_cfg: dict[str, Any],
+):
+    test_cfg = _get_test_cfg()
+    test_cfg['train_loader'] = tiny_ft_dataloader_cfg
+    train_loader = test_cfg['train_loader'] if datamix is None else datamix
+    kwargs = {
+        'schedule': [{
+            'duration': duration,
+            'train_loader': train_loader,
+        }, {
+            'duration': '2ep',
+            'train_loader': {},
+        }],
+    }
+    if duration == '':
+        del kwargs['schedule'][0]['duration']
+    if datamix is not None and len(datamix) == 0:
+        del kwargs['schedule'][0]['train_loader']
+
+    context = nullcontext()
+    if datamix is not None or duration == '':
+        context = pytest.raises(ValueError)
+    with context:
+        callback = build_callback(
+            'curriculum_learning',
+            kwargs=kwargs,
+            train_config=test_cfg,
+        )
+        assert callback is not None
+
+
+@pytest.mark.parametrize('duration', ['1ep', '10tok', '2ep'])
+def test_curriculum_learning_callback_before_load(
+    duration: str,
+    build_tiny_mpt: Callable,
+):
+    model = build_tiny_mpt()
+    state = State(
+        model=model,
+        rank_zero_seed=0,
+        run_name='test_state',
+        device=DeviceCPU(),
+    )
+    state.max_duration = '3ep'
+    dl_mock = MagicMock(spec=DataLoader)
+    dl_mock.dataset = MagicMock(spec=StreamingTextDataset)
+    state.train_dataloader = dl_mock
+    logger = Logger(state)
+
+    test_cfg = _get_test_cfg()
+    kwargs = {
+        'schedule': [{
+            'duration': duration,
+            'train_loader': test_cfg['train_loader'],
+        }, {
+            'duration': '2ep',
+            'train_loader': test_cfg['train_loader'],
+        }],
+    }
+
+    callback = build_callback(
+        'curriculum_learning',
+        kwargs=kwargs,
+        train_config=test_cfg,
+    )
+    context = nullcontext()
+    if duration != '1ep':
+        context = pytest.raises(ValueError)
+    with context:
+        callback.before_load(state, logger)
+
+
+def test_curriculum_learning_callback_after_load(build_tiny_mpt: Callable,):
+    model = build_tiny_mpt()
+    state = State(
+        model=model,
+        rank_zero_seed=0,
+        run_name='test_state',
+        device=DeviceCPU(),
+    )
+    state.max_duration = '3ep'
+    dl_mock = MagicMock(spec=DataLoader)
+    dl_mock.dataset = MagicMock(spec=StreamingTextDataset)
+    state.train_dataloader = dl_mock
+    state.timestamp.epoch_in_iteration = 2
+    logger = Logger(state)
+
+    test_cfg = _get_test_cfg()
+    kwargs = {
+        'schedule': [{
+            'duration': '1ep',
+            'train_loader': test_cfg['train_loader'],
+        }, {
+            'duration': '2ep',
+            'train_loader': test_cfg['train_loader'],
+        }],
+    }
+
+    callback = build_callback(
+        'curriculum_learning',
+        kwargs=kwargs,
+        train_config=test_cfg,
+    )
+    assert state.timestamp.iteration == 0
+    callback.after_load(state, logger)
+    assert state.timestamp.iteration == 1
+
+
+def test_curriculum_learning_callback_iteration(
+    build_tiny_mpt: Callable,
+    tiny_ft_dataloader_cfg: dict[str, Any],
+    monkeypatch: pytest.MonkeyPatch,
+):
+    model = build_tiny_mpt()
+    state = State(
+        model=model,
+        rank_zero_seed=0,
+        run_name='test_state',
+        device=DeviceCPU(),
+    )
+    state.max_duration = '3ep'
+    dl_mock = MagicMock(spec=DataLoader)
+    ds_mock = MagicMock(spec=StreamingTextDataset)
+    monkeypatch.setattr(
+        'llmfoundry.data.text_data.StreamingTextDataset',
+        lambda *args,
+        **kwargs: ds_mock,
+    )
+    dl_mock.dataset = ds_mock
+    state.train_dataloader = dl_mock
+    state.timestamp.epoch_in_iteration = 2
+    logger = Logger(state)
+
+    test_cfg = _get_test_cfg()
+    kwargs = {
+        'schedule': [{
+            'duration': '1ep',
+            'train_loader': test_cfg['train_loader'],
+        }, {
+            'duration': '2ep',
+            'train_loader': test_cfg['train_loader'],
+        }],
+    }
+
+    callback = build_callback(
+        'curriculum_learning',
+        kwargs=kwargs,
+        train_config=test_cfg,
+    )
+
+    callback.iteration_start(state, logger)
+    assert state._iteration_length == Time(1, TimeUnit.EPOCH)
+    callback.iteration_end(state, logger)
+    callback.iteration_start(state, logger)
+    assert state._iteration_length == Time(2, TimeUnit.EPOCH)
+
+
+def test_curriculum_learning_callback_state_dict(build_tiny_mpt: Callable,):
+    model = build_tiny_mpt()
+    state = State(
+        model=model,
+        rank_zero_seed=0,
+        run_name='test_state',
+        device=DeviceCPU(),
+    )
+    state.max_duration = '3ep'
+    dl_mock = MagicMock(spec=DataLoader)
+    dl_mock.dataset = MagicMock(spec=StreamingTextDataset)
+    state.train_dataloader = dl_mock
+    state.timestamp.epoch_in_iteration = 2
+    logger = Logger(state)
+
+    test_cfg = _get_test_cfg()
+    kwargs = {
+        'schedule': [{
+            'duration': '1ep',
+            'train_loader': test_cfg['train_loader'],
+        }, {
+            'duration': '2ep',
+            'train_loader': test_cfg['train_loader'],
+        }],
+    }
+
+    callback = build_callback(
+        'curriculum_learning',
+        kwargs=kwargs,
+        train_config=test_cfg,
+    )
+    callback.iteration_start(state, logger)
+    callback.iteration_end(state, logger)
+    assert callback.state_dict() == {
+        'schedule': kwargs['schedule'],
+        'schedule_index': 1,
+    }
+
+
+def test_curriculum_learning_callback_load_state_dict(
+    build_tiny_mpt: Callable,
+):
+    model = build_tiny_mpt()
+    state = State(
+        model=model,
+        rank_zero_seed=0,
+        run_name='test_state',
+        device=DeviceCPU(),
+    )
+    state.max_duration = '3ep'
+    dl_mock = MagicMock(spec=DataLoader)
+    dl_mock.dataset = MagicMock(spec=StreamingTextDataset)
+    state.train_dataloader = dl_mock
+    state.timestamp.epoch_in_iteration = 2
+    logger = Logger(state)
+
+    test_cfg = _get_test_cfg()
+    kwargs = {
+        'schedule': [{
+            'duration': '1ep',
+            'train_loader': test_cfg['train_loader'],
+        }, {
+            'duration': '2ep',
+            'train_loader': test_cfg['train_loader'],
+        }],
+    }
+
     callback = build_callback(
         'curriculum_learning',
         kwargs=kwargs,
-        train_config={'train_loader': {}},
+        train_config=test_cfg,
     )
-    assert callback is not None
+    callback.iteration_start(state, logger)
+    callback.iteration_end(state, logger)
+    assert callback.state_dict() == {
+        'schedule': kwargs['schedule'],
+        'schedule_index': 1,
+    }
+
+
+def _get_test_cfg() -> dict[str, Any]:
+    conf_path = 'scripts/train/yamls/pretrain/testing.yaml'
+    with open(conf_path) as f:
+        test_cfg = om.load(f)
+    batch_size = test_cfg['device_train_microbatch_size']
+    test_cfg['device_train_batch_size'] = batch_size
+    return om.to_container(test_cfg, resolve=True)
diff --git a/tests/fixtures/data.py b/tests/fixtures/data.py
index ff437974bf..2c34dff817 100644
--- a/tests/fixtures/data.py
+++ b/tests/fixtures/data.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from pathlib import Path
+from typing import Any
 from unittest.mock import MagicMock, patch
 
 from composer.utils import dist
@@ -26,14 +27,11 @@ def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> Path:
 
 
 @fixture
-@patch('os.cpu_count', MagicMock(return_value=1))
-def tiny_ft_dataloader(
+def tiny_ft_dataloader_cfg(
     tiny_ft_dataset_path: Path,
-    mpt_tokenizer: PreTrainedTokenizerBase,
     max_seq_len: int = 128,
-    device_batch_size: int = 1,
-) -> DataLoader:
-    dataloader_cfg = DictConfig({
+) -> dict[str, Any]:
+    return {
         'dataset': {
             'hf_name': str(tiny_ft_dataset_path),
             'split': 'train',
@@ -49,7 +47,17 @@ def tiny_ft_dataloader(
         'prefetch_factor': 2,
         'persistent_workers': False,
         'timeout': 0,
-    })
+    }
+
+
+@fixture
+@patch('os.cpu_count', MagicMock(return_value=1))
+def tiny_ft_dataloader(
+    mpt_tokenizer: PreTrainedTokenizerBase,
+    tiny_ft_dataloader_cfg: dict[str, Any],
+    device_batch_size: int = 1,
+) -> DataLoader:
+    dataloader_cfg = DictConfig(tiny_ft_dataloader_cfg)
 
     dataloader = build_finetuning_dataloader(
         **dataloader_cfg,