From db7a5c01f2656dc0ef383f63a3cb1ed3e1e7227e Mon Sep 17 00:00:00 2001
From: Milo Cress <iamroot@databricks.com>
Date: Fri, 15 Nov 2024 10:33:39 -0500
Subject: [PATCH 1/8] Catch bad data prep (#1644)

Co-authored-by: Daniel King <43149077+dakinggg@users.noreply.github.com>
---
 .../data_prep/convert_delta_to_json.py        |  32 ++-
 llmfoundry/utils/exceptions.py                |   4 +-
 .../data_prep/test_convert_delta_to_json.py   | 206 ++++++++++++------
 tests/utils/test_exceptions.py                |   2 +
 4 files changed, 173 insertions(+), 71 deletions(-)

diff --git a/llmfoundry/command_utils/data_prep/convert_delta_to_json.py b/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
index e9879fa689..acf7086a12 100644
--- a/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
+++ b/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
@@ -1,6 +1,7 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
+import json
 import logging
 import os
 import re
@@ -27,6 +28,7 @@
     FaultyDataPrepCluster,
     InsufficientPermissionsError,
     MalformedUCTableError,
+    StoragePermissionError,
     UCNotEnabledError,
 )
 
@@ -681,7 +683,7 @@ def fetch_DT(
 
     log.info(f'Directory {json_output_folder} created.')
 
-    # validate_and_get_cluster_info allows cluster_id to be None if use_serverless is True
+    # Validate_and_get_cluster_info allows cluster_id to be None if use_serverless is True.
     method, dbsql, sparkSession = validate_and_get_cluster_info(
         cluster_id=cluster_id,
         databricks_host=DATABRICKS_HOST,
@@ -732,12 +734,38 @@ def fetch_DT(
     if dbsql is not None:
         dbsql.close()
 
-    # combine downloaded jsonl into one big jsonl for IFT
+    # Combine downloaded jsonl into one big jsonl for IFT.
     iterative_combine_jsons(
         json_output_folder,
         os.path.join(json_output_folder, json_output_filename),
     )
 
+    _validate_written_file(
+        json_output_folder,
+        json_output_filename,
+        delta_table_name,
+    )
+
+
+def _validate_written_file(
+    json_output_folder: str,
+    json_output_filename: str,
+    delta_table_name: str,
+):
+    # Validate downloaded dataset is actually downloaded.
+    with open(os.path.join(json_output_folder, json_output_filename)) as f:
+        is_empty = True
+        for line in f.readlines():
+            is_empty = False
+            try:
+                json.loads(line)
+            except Exception as e:
+                raise ValueError(f'Line is not valid json: {line}') from e
+        if is_empty:
+            raise StoragePermissionError(
+                f'Unable to download {delta_table_name}, check network permissions.',
+            )
+
 
 def _check_imports():
     try:
diff --git a/llmfoundry/utils/exceptions.py b/llmfoundry/utils/exceptions.py
index 2c2b97fd49..53d3baebfc 100644
--- a/llmfoundry/utils/exceptions.py
+++ b/llmfoundry/utils/exceptions.py
@@ -409,8 +409,10 @@ def __init__(self, output_folder: str) -> None:
 class MisconfiguredHfDatasetError(UserError):
     """Error thrown when a HuggingFace dataset is misconfigured."""
 
-    def __init__(self, dataset_name: str, split: str) -> None:
+    def __init__(self, dataset_name: str, split: Optional[str] = None) -> None:
         message = f'Your dataset (name={dataset_name}, split={split}) is misconfigured. ' + \
+            'Please check your dataset format and make sure you can load your dataset locally.' \
+            if split is not None else f'Your dataset (name={dataset_name}) is misconfigured. ' + \
             'Please check your dataset format and make sure you can load your dataset locally.'
         super().__init__(message, dataset_name=dataset_name, split=split)
 
diff --git a/tests/a_scripts/data_prep/test_convert_delta_to_json.py b/tests/a_scripts/data_prep/test_convert_delta_to_json.py
index ef4a2d0909..bb5b3f93d1 100644
--- a/tests/a_scripts/data_prep/test_convert_delta_to_json.py
+++ b/tests/a_scripts/data_prep/test_convert_delta_to_json.py
@@ -1,9 +1,14 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
+import json
+import os
+import shutil
 import sys
 import unittest
 from argparse import Namespace
+from contextlib import contextmanager
+from tempfile import NamedTemporaryFile, mkdtemp
 from typing import Any
 from unittest.mock import MagicMock, mock_open, patch
 
@@ -15,6 +20,7 @@
 from llmfoundry.command_utils.data_prep.convert_delta_to_json import (
     FaultyDataPrepCluster,
     InsufficientPermissionsError,
+    _validate_written_file,
     download,
     fetch,
     fetch_DT,
@@ -25,9 +31,41 @@
 from llmfoundry.utils.exceptions import (
     DeltaTableNotFoundError,
     MalformedUCTableError,
+    StoragePermissionError,
 )
 
 
+def _mock_write_jsonl(filename: str):
+    """Writes a mock .jsonl file to filename."""
+
+    def _inner(*_: Any, **__: Any):
+        base, ___ = os.path.split(filename)
+        os.makedirs(base, exist_ok=True)
+        with open(filename, 'w') as f:
+            f.write(json.dumps({'prompt': 'prompt', 'response': 'response'}))
+
+        assert os.path.exists(filename)
+
+    return _inner
+
+
+@contextmanager
+def UncreatedNamedTemporaryFile(suffix: str):
+    """Makes a temp folder for a named temporary file."""
+    tempdir = None  # pyright
+    try:
+        tempdir = mkdtemp()
+        tempfile = NamedTemporaryFile(dir=tempdir, suffix=suffix)
+        tempfile.__enter__()
+        os.remove(tempfile.name)
+        yield tempfile
+
+    finally:
+        tempfile.__exit__(None, None, None)
+        if tempdir is not None:
+            shutil.rmtree(tempdir)
+
+
 class TestConvertDeltaToJsonl(unittest.TestCase):
 
     def test_run_query_dbconnect_insufficient_permissions(self):
@@ -91,14 +129,12 @@ def test_stream_delta_to_json(
         mock_sql_connect: Any,
     ):
         delta_table_name = 'test_table'
-        json_output_folder = '/path/to/jsonl'
         DATABRICKS_HOST = 'test_host'
         DATABRICKS_TOKEN = 'test_token'
         http_path = 'test_path'
         batch_size = 1000
         cluster_id = '1234'
         use_serverless = False
-        json_output_filename = 'combined.jsonl'
 
         mock_cluster_get = MagicMock()
         mock_cluster_get.return_value = MagicMock(
@@ -106,28 +142,28 @@ def test_stream_delta_to_json(
         )
         mock_workspace_client.return_value.clusters.get = mock_cluster_get
 
-        fetch_DT(
-            delta_table_name=delta_table_name,
-            json_output_folder=json_output_folder,
-            http_path=http_path,
-            cluster_id=cluster_id,
-            DATABRICKS_HOST=DATABRICKS_HOST,
-            DATABRICKS_TOKEN=DATABRICKS_TOKEN,
-            use_serverless=use_serverless,
-            batch_size=batch_size,
-            json_output_filename=json_output_filename,
-        )
+        with UncreatedNamedTemporaryFile(suffix='.jsonl',) as tf:
+            mock_combine_jsons.side_effect = _mock_write_jsonl(tf.name)
+            json_output_folder, json_output_filename = os.path.split(tf.name)
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+                use_serverless=use_serverless,
+                batch_size=batch_size,
+                json_output_filename=json_output_filename,
+            )
         mock_sql_connect.assert_called_once_with(
             server_hostname='test_host',
             http_path='test_path',
             access_token='test_token',
         )
-        mock_makedirs.assert_called_once_with('/path/to/jsonl', exist_ok=True)
+        mock_makedirs.assert_called()
         mock_fetch.assert_called_once()
-        mock_combine_jsons.assert_called_once_with(
-            '/path/to/jsonl',
-            '/path/to/jsonl/combined.jsonl',
-        )
+        mock_combine_jsons.assert_called_once()
 
     @patch(
         'llmfoundry.command_utils.data_prep.convert_delta_to_json.os.listdir',
@@ -272,7 +308,6 @@ def test_dbconnect_called(
         mock_sql_connect: Any,
     ):
         delta_table_name = 'test_table'
-        json_output_folder = '/path/to/jsonl'
         # Execute function with http_path=None (should use dbconnect)
         http_path = None
         cluster_id = '1234'
@@ -291,20 +326,25 @@ def test_dbconnect_called(
         )  # Mock return value for getOrCreate
         mock_databricks_session.builder.remote.return_value = mock_remote
 
-        fetch_DT(
-            delta_table_name=delta_table_name,
-            json_output_folder=json_output_folder,
-            http_path=http_path,
-            cluster_id=cluster_id,
-            DATABRICKS_HOST=DATABRICKS_HOST,
-            DATABRICKS_TOKEN=DATABRICKS_TOKEN,
-            use_serverless=use_serverless,
-        )
+        with UncreatedNamedTemporaryFile(suffix='.jsonl',) as tf:
+            mock_combine_jsons.side_effect = _mock_write_jsonl(tf.name)
+            json_output_folder, json_output_filename = os.path.split(tf.name)
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+                use_serverless=use_serverless,
+                json_output_filename=json_output_filename,
+            )
         mock_databricks_session.builder.remote.assert_called_once_with(
             host=DATABRICKS_HOST,
             token=DATABRICKS_TOKEN,
             cluster_id=cluster_id,
         )
+        mock_combine_jsons.assert_called_once()
 
     @patch(
         'databricks.sql.connect',
@@ -332,7 +372,6 @@ def test_sqlconnect_called_dbr13(
         mock_sql_connect: Any,
     ):
         delta_table_name = 'test_table'
-        json_output_folder = '/path/to/jsonl'
         # Execute function with http_path=None (should use dbconnect)
         http_path = 'test_path'
         cluster_id = '1234'
@@ -346,20 +385,26 @@ def test_sqlconnect_called_dbr13(
         )
         mock_workspace_client.return_value.clusters.get.return_value = mock_cluster_response
 
-        fetch_DT(
-            delta_table_name=delta_table_name,
-            json_output_folder=json_output_folder,
-            http_path=http_path,
-            cluster_id=cluster_id,
-            DATABRICKS_HOST=DATABRICKS_HOST,
-            DATABRICKS_TOKEN=DATABRICKS_TOKEN,
-            use_serverless=use_serverless,
-        )
+        with UncreatedNamedTemporaryFile(suffix='.jsonl',) as tf:
+            mock_combine_jsons.side_effect = _mock_write_jsonl(tf.name)
+            json_output_folder, json_output_filename = os.path.split(tf.name)
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+                use_serverless=use_serverless,
+                json_output_filename=json_output_filename,
+            )
+
         mock_sql_connect.assert_called_once_with(
             server_hostname=DATABRICKS_HOST,
             http_path=http_path,
             access_token=DATABRICKS_TOKEN,
         )
+        mock_combine_jsons.assert_called_once()
 
     @patch(
         'databricks.sql.connect',
@@ -387,7 +432,6 @@ def test_sqlconnect_called_dbr14(
         mock_sql_connect: Any,
     ):
         delta_table_name = 'test_table'
-        json_output_folder = '/path/to/jsonl'
         # Execute function with http_path=None (should use dbconnect)
         http_path = 'test_path'
         cluster_id = '1234'
@@ -401,20 +445,26 @@ def test_sqlconnect_called_dbr14(
         )
         mock_workspace_client.return_value.clusters.get.return_value = mock_cluster_response
 
-        fetch_DT(
-            delta_table_name=delta_table_name,
-            json_output_folder=json_output_folder,
-            http_path=http_path,
-            cluster_id=cluster_id,
-            DATABRICKS_HOST=DATABRICKS_HOST,
-            DATABRICKS_TOKEN=DATABRICKS_TOKEN,
-            use_serverless=use_serverless,
-        )
+        with UncreatedNamedTemporaryFile(suffix='.jsonl',) as tf:
+            mock_combine_jsons.side_effect = _mock_write_jsonl(tf.name)
+            json_output_folder, json_output_filename = os.path.split(tf.name)
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+                use_serverless=use_serverless,
+                json_output_filename=json_output_filename,
+            )
+
         mock_sql_connect.assert_called_once_with(
             server_hostname=DATABRICKS_HOST,
             http_path=http_path,
             access_token=DATABRICKS_TOKEN,
         )
+        mock_combine_jsons.assert_called_once()
 
     @patch(
         'databricks.sql.connect',
@@ -442,7 +492,6 @@ def test_sqlconnect_called_https(
         mock_sql_connect: Any,
     ):
         delta_table_name = 'test_table'
-        json_output_folder = '/path/to/jsonl'
         # Execute function with http_path=None (should use dbconnect)
         http_path = 'test_path'
         cluster_id = '1234'
@@ -456,20 +505,25 @@ def test_sqlconnect_called_https(
         )
         mock_workspace_client.return_value.clusters.get.return_value = mock_cluster_response
 
-        fetch_DT(
-            delta_table_name=delta_table_name,
-            json_output_folder=json_output_folder,
-            http_path=http_path,
-            cluster_id=cluster_id,
-            DATABRICKS_HOST=DATABRICKS_HOST,
-            DATABRICKS_TOKEN=DATABRICKS_TOKEN,
-            use_serverless=use_serverless,
-        )
+        with UncreatedNamedTemporaryFile(suffix='.jsonl',) as tf:
+            mock_combine_jsons.side_effect = _mock_write_jsonl(tf.name)
+            json_output_folder, json_output_filename = os.path.split(tf.name)
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+                use_serverless=use_serverless,
+                json_output_filename=json_output_filename,
+            )
         mock_sql_connect.assert_called_once_with(
             server_hostname='test-host',
             http_path=http_path,
             access_token=DATABRICKS_TOKEN,
         )
+        mock_combine_jsons.assert_called_once()
 
     @patch(
         'databricks.sql.connect',
@@ -497,7 +551,6 @@ def test_serverless(
         mock_sql_connect: Any,
     ):
         delta_table_name = 'test_table'
-        json_output_folder = '/path/to/jsonl'
         # Execute function with http_path=None (should use dbconnect)
         http_path = 'test_path'
         cluster_id = '1234'
@@ -508,17 +561,23 @@ def test_serverless(
         mock_cluster_response = Namespace(spark_version='14.2.0-scala2.12')
         mock_workspace_client.return_value.clusters.get.return_value = mock_cluster_response
 
-        fetch_DT(
-            delta_table_name=delta_table_name,
-            json_output_folder=json_output_folder,
-            http_path=http_path,
-            cluster_id=cluster_id,
-            DATABRICKS_HOST=DATABRICKS_HOST,
-            DATABRICKS_TOKEN=DATABRICKS_TOKEN,
-            use_serverless=use_serverless,
-        )
+        with UncreatedNamedTemporaryFile(suffix='.jsonl',) as tf:
+            mock_combine_jsons.side_effect = _mock_write_jsonl(tf.name)
+            json_output_folder, json_output_filename = os.path.split(tf.name)
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+                use_serverless=use_serverless,
+                json_output_filename=json_output_filename,
+            )
+
         assert not mock_sql_connect.called
         assert not mock_databricks_session.builder.remote.called
+        mock_combine_jsons.assert_called_once()
 
     def test_format_tablename(self):
         self.assertEqual(
@@ -650,6 +709,17 @@ def test_fetch_nonexistent_table_error(
         # Verify that get_total_rows was called
         mock_gtr.assert_called_once()
 
+    def test_fetch_DT_catches_bad_download(self):
+        with NamedTemporaryFile() as tf:
+            file_name = tf.name
+            file_folder, file_name = os.path.split(file_name)
+            with self.assertRaises(StoragePermissionError):
+                _validate_written_file(
+                    file_folder,
+                    file_name,
+                    'test_delta_table',
+                )
+
     @patch(
         'llmfoundry.command_utils.data_prep.convert_delta_to_json.get_total_rows',
     )
diff --git a/tests/utils/test_exceptions.py b/tests/utils/test_exceptions.py
index 564dfa2f14..fd5a470c15 100644
--- a/tests/utils/test_exceptions.py
+++ b/tests/utils/test_exceptions.py
@@ -63,6 +63,8 @@ def get_default_value(arg_type: Optional[type] = None):
             return bool
         elif arg_type == list[dict[str, Any]]:
             return [{'key': 'value'}]
+        elif arg_type == Optional[str]:
+            return 'string_but_optional'
         raise ValueError(f'Unsupported arg type: {arg_type}')
 
     kwargs = {

From e237af5e73276cf2f59154e930ab681cd0f5702f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 17 Nov 2024 22:46:04 -0800
Subject: [PATCH 2/8] Update pytest-cov requirement from <6,>=4 to >=4,<7
 (#1663)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 63be3c41e8..b14dfb583e 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@
     'pre-commit>=3.4.0,<4',
     'pytest>=7.2.1,<9',
     'pytest_codeblocks>=0.16.1,<0.18',
-    'pytest-cov>=4,<6',
+    'pytest-cov>=4,<7',
     'pyright==1.1.256',
     'toml>=0.10.2,<0.11',
     'packaging>=21,<25',

From e2cc41bfad934c7a46e160f527c88ad666f2f3f7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 17 Nov 2024 22:47:12 -0800
Subject: [PATCH 3/8] Bump coverage[toml] from 7.6.1 to 7.6.4 (#1650)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Milo Cress <iamroot@databricks.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b14dfb583e..0c8b461572 100644
--- a/setup.py
+++ b/setup.py
@@ -79,7 +79,7 @@
 extra_deps = {}
 
 extra_deps['dev'] = [
-    'coverage[toml]==7.6.1',
+    'coverage[toml]==7.6.4',
     'pre-commit>=3.4.0,<4',
     'pytest>=7.2.1,<9',
     'pytest_codeblocks>=0.16.1,<0.18',

From 8a1e55eb53f0645c1bc98b96c3e2be76a79753c3 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Mon, 18 Nov 2024 06:56:41 -0800
Subject: [PATCH 4/8] Move transform_model_pre_registration in hf_checkpointer
 (#1664)

Co-authored-by: Daniel King <43149077+dakinggg@users.noreply.github.com>
---
 llmfoundry/callbacks/hf_checkpointer.py       | 11 ++-
 .../inference/test_convert_composer_to_hf.py  | 72 ++++++++++++++++++-
 2 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py
index 7ce9818426..4cc5f46d1a 100644
--- a/llmfoundry/callbacks/hf_checkpointer.py
+++ b/llmfoundry/callbacks/hf_checkpointer.py
@@ -784,6 +784,10 @@ def tensor_hook(
 
         if dist.get_global_rank() == 0:
             if register_to_mlflow:
+                assert new_model_instance is not None
+                new_model_instance = self.transform_model_pre_registration(
+                    new_model_instance,
+                )
                 if self.using_peft:
 
                     # Save and register peft model to mlflow, this code path uses our older two step logic
@@ -798,10 +802,6 @@ def tensor_hook(
                         temp_save_dir,
                         'register_save',
                     )
-                    assert new_model_instance is not None
-                    new_model_instance = self.transform_model_pre_registration(
-                        new_model_instance,
-                    )
                     new_model_instance.save_pretrained(
                         register_save_dir,
                         max_shard_size='1GB',
@@ -860,9 +860,6 @@ def _save_and_register_peft_model(
         original_tokenizer: Optional[Any],
         save_dir: str,
     ):
-        new_model_instance = self.transform_model_pre_registration(
-            new_model_instance,
-        )
         components = {'model': new_model_instance}
         if original_tokenizer is not None:
             components['tokenizer'] = original_tokenizer
diff --git a/tests/a_scripts/inference/test_convert_composer_to_hf.py b/tests/a_scripts/inference/test_convert_composer_to_hf.py
index 5dafdcb466..67b4a69a3b 100644
--- a/tests/a_scripts/inference/test_convert_composer_to_hf.py
+++ b/tests/a_scripts/inference/test_convert_composer_to_hf.py
@@ -624,7 +624,7 @@ def test_huggingface_conversion_callback_interval(
 def _get_model_and_tokenizer(
     model: str,
     max_seq_len: int,
-    tie_word_embeddings: bool,
+    tie_word_embeddings: Optional[bool],
     precision: str,
 ):
     if model == 'mpt':
@@ -1110,6 +1110,76 @@ def test_huggingface_conversion_callback(
     delete_transformers_cache()
 
 
+@patch('os.cpu_count', MagicMock(return_value=1))
+@patch(
+    'llmfoundry.callbacks.hf_checkpointer.SpawnProcess',
+    new=MockSpawnProcess,
+)
+def test_transform_model_pre_registration():
+    """Test `transform_model_pre_registration` method is called."""
+
+    class ExtendedHuggingFaceCheckpointer(HuggingFaceCheckpointer):
+        """Set PEFT to false before registering for testing."""
+
+        def transform_model_pre_registration(self, model: PreTrainedModel):
+            self.using_peft = False
+            return super().transform_model_pre_registration(model)
+
+    model_cfg, tokenizer_name = _get_model_and_tokenizer(
+        model='neo',
+        max_seq_len=10,
+        tie_word_embeddings=None,
+        precision='bfloat16',
+    )
+    model_cfg['peft_config'] = {
+        'peft_type': 'LORA',
+        'task_type': 'CAUSAL_LM',
+        'lora_alpha': 32,
+        'lora_dropout': 0.05,
+        'r': 16,
+        'target_modules': 'all-linear',
+    }
+    tokenizer = build_tokenizer(
+        tokenizer_name=tokenizer_name,
+        tokenizer_kwargs={},
+    )
+
+    original_model = build_composer_model(
+        model_cfg.pop('name'),
+        tokenizer=tokenizer,
+        cfg=model_cfg,
+    )
+
+    logger = MagicMock()
+    state = MagicMock()
+    state.timestamp.batch = 1
+    state.is_model_ddp = False
+    state.model = original_model
+    state.model.tokenizer = tokenizer
+
+    checkpointer = ExtendedHuggingFaceCheckpointer(
+        save_folder='test',
+        save_interval='1ba',
+    )
+    mlflow_logger_mock = _create_mlflow_logger_mock()
+    checkpointer.mlflow_loggers = [mlflow_logger_mock]  # type: ignore
+
+    assert model_cfg is not None
+    assert tokenizer_name is not None
+
+    checkpointer._save_and_register_peft_model = MagicMock()
+    checkpointer.using_peft = True
+    checkpointer._save_checkpoint(
+        state=state,
+        logger=logger,
+        upload_to_save_folder=True,
+        register_to_mlflow=True,
+    )
+
+    checkpointer._save_and_register_peft_model.assert_not_called()
+    assert mlflow_logger_mock.log_model.call_count == 1
+
+
 # TODO(GRT-2431): Refactor as enums
 @pytest.mark.parametrize(
     'model,tie_word_embeddings',

From bb94a9a119c894bb5d890f9d75f2ba946d0b1d59 Mon Sep 17 00:00:00 2001
From: Vincent Chen <v.chen@databricks.com>
Date: Mon, 18 Nov 2024 09:22:54 -0800
Subject: [PATCH 5/8] Catch Cluster Permissions Error (#1660)

Co-authored-by: v-chen_data <v-chen_data@example.com>
---
 .../data_prep/convert_delta_to_json.py        |  8 ++++
 .../data_prep/test_convert_delta_to_json.py   | 46 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/llmfoundry/command_utils/data_prep/convert_delta_to_json.py b/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
index acf7086a12..1a0e575850 100644
--- a/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
+++ b/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
@@ -706,6 +706,14 @@ def fetch_DT(
             dbsql,
         )
     except (grpc.RpcError, spark_errors.SparkConnectGrpcException) as e:
+        if isinstance(
+            e,
+            spark_errors.SparkConnectGrpcException,
+        ) and 'is not Shared or Single User Cluster' in str(e):
+            raise FaultyDataPrepCluster(
+                message=
+                f'The cluster you have provided: {cluster_id} does not have data governance enabled. Please use a cluster with a data security mode other than NONE. {e}',
+            ) from e
         if isinstance(
             e,
             spark_errors.SparkConnectGrpcException,
diff --git a/tests/a_scripts/data_prep/test_convert_delta_to_json.py b/tests/a_scripts/data_prep/test_convert_delta_to_json.py
index bb5b3f93d1..95610f00b6 100644
--- a/tests/a_scripts/data_prep/test_convert_delta_to_json.py
+++ b/tests/a_scripts/data_prep/test_convert_delta_to_json.py
@@ -750,3 +750,49 @@ def test_fetch_malformed_table_error(
 
         # Verify that get_total_rows was called
         mock_gtr.assert_called_once()
+
+    @patch(
+        'llmfoundry.command_utils.data_prep.convert_delta_to_json.fetch',
+    )
+    @patch(
+        'llmfoundry.command_utils.data_prep.convert_delta_to_json.validate_and_get_cluster_info',
+    )
+    def test_non_shared_single_user_cluster_error(
+        self,
+        mock_validate_cluster_info: MagicMock,
+        mock_fetch: MagicMock,
+    ):
+        mock_validate_cluster_info.return_value = ('dbconnect', None, None)
+
+        exception_message = 'Cluster is not Shared or Single User Cluster'
+        spark_exception = SparkConnectGrpcException(exception_message)
+
+        mock_fetch.side_effect = spark_exception
+
+        # Define test inputs
+        delta_table_name = 'test_table'
+        json_output_folder = '/tmp/to/jsonl'
+        http_path = None
+        cluster_id = 'test-cluster-id'
+        use_serverless = False
+        DATABRICKS_HOST = 'https://test-host'
+        DATABRICKS_TOKEN = 'test-token'
+
+        # Act & Assert
+        with self.assertRaises(FaultyDataPrepCluster) as context:
+            fetch_DT(
+                delta_table_name=delta_table_name,
+                json_output_folder=json_output_folder,
+                http_path=http_path,
+                cluster_id=cluster_id,
+                use_serverless=use_serverless,
+                DATABRICKS_HOST=DATABRICKS_HOST,
+                DATABRICKS_TOKEN=DATABRICKS_TOKEN,
+            )
+
+        self.assertIn(
+            f'The cluster you have provided: {cluster_id} does not have data governance enabled. Please use a cluster with a data security mode other than NONE.',
+            str(context.exception),
+        )
+
+        mock_fetch.assert_called()

From 800400c2559239e6b0a61b35a69f3b697926c4a3 Mon Sep 17 00:00:00 2001
From: Charles Tang <j316chuck@users.noreply.github.com>
Date: Mon, 18 Nov 2024 16:25:53 -0800
Subject: [PATCH 6/8] Add mosaicml version bump (#1661)

---
 setup.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 0c8b461572..566e6aae9c 100644
--- a/setup.py
+++ b/setup.py
@@ -52,7 +52,7 @@
 ]
 
 install_requires = [
-    'mosaicml[libcloud,wandb,oci,gcs,mlflow]>=0.26.0,<0.27',
+    'mosaicml[libcloud,wandb,oci,gcs,mlflow]>=0.27.0,<0.28',
     'mlflow>=2.14.1,<2.18',
     'accelerate>=0.25,<1.2',  # for HF inference `device_map`
     'transformers>=4.43.2,<4.47',
@@ -91,7 +91,7 @@
 ]
 
 extra_deps['databricks'] = [
-    'mosaicml[databricks]>=0.26.0,<0.27',
+    'mosaicml[databricks]>=0.27.0,<0.28',
     'numpy<2',
     'databricks-sql-connector>=3,<4',
     'databricks-connect==14.1.0',
@@ -99,7 +99,7 @@
 ]
 
 extra_deps['tensorboard'] = [
-    'mosaicml[tensorboard]>=0.26.0,<0.27',
+    'mosaicml[tensorboard]>=0.27.0,<0.28',
 ]
 
 # Flash 2 group kept for backwards compatibility
@@ -110,7 +110,7 @@
 extra_deps['gpu'] = copy.deepcopy(extra_deps['gpu-flash2'])
 
 extra_deps['peft'] = [
-    'mosaicml[peft]>=0.26.0,<0.27',
+    'mosaicml[peft]>=0.27.0,<0.28',
 ]
 
 extra_deps['openai'] = [

From ce13961de9991649fead5bf17aaf4f8443000e58 Mon Sep 17 00:00:00 2001
From: Abhay Gupta <gupta-abhay@users.noreply.github.com>
Date: Tue, 19 Nov 2024 14:52:06 +0530
Subject: [PATCH 7/8] Changes for removing unused terms in CE loss fn  (#1643)

Co-authored-by: Daniel King <43149077+dakinggg@users.noreply.github.com>
Co-authored-by: Milo Cress <iamroot@databricks.com>
---
 llmfoundry/models/mpt/modeling_mpt.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py
index 0afb493844..94e5fa29d5 100644
--- a/llmfoundry/models/mpt/modeling_mpt.py
+++ b/llmfoundry/models/mpt/modeling_mpt.py
@@ -79,6 +79,8 @@
 from llmfoundry.models.layers.norm import LPLayerNorm  # type: ignore
 # isort: on
 
+from llmfoundry.utils.warnings import VersionedDeprecationWarning
+
 log = logging.getLogger(__name__)
 
 CROSS_ENTROPY_IGNORE_INDEX = -100
@@ -1360,6 +1362,12 @@ def compute_loss_from_logits(
     else:
         loss = losses.sum() / (targets != loss_fn.ignore_index).sum()
         if sample_weighing_factor is not None:
+            warnings.warn(
+                VersionedDeprecationWarning(
+                    message='sample_weighing_factor has been deprecated!',
+                    remove_version='0.17.0',
+                ),
+            )
             if sample_weighing_factor.shape[0] > 1:
                 raise ValueError(
                     'Sample weighing factor is not supported when batch["sample_weighing_factor"].shape[0] > 1.',

From ee2fb11c33e25d49eedea7ed2e850abde3599ca9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 15:10:27 -0800
Subject: [PATCH 8/8] Update setuptools requirement from <68.0.0 to <76.0.0
 (#1662)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2208fdac2e..ed748e5613 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 # build requirements
 [build-system]
-requires = ["setuptools < 68.0.0"]
+requires = ["setuptools < 76.0.0"]
 build-backend = "setuptools.build_meta"
 
 # iSort