Skip to content

Commit

Permalink
Merge branch 'main' into chuck/bump-torch-2-5
Browse files Browse the repository at this point in the history
  • Loading branch information
j316chuck authored Nov 20, 2024
2 parents 8356e1f + ee2fb11 commit 26df83f
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 80 deletions.
11 changes: 4 additions & 7 deletions llmfoundry/callbacks/hf_checkpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,10 @@ def tensor_hook(

if dist.get_global_rank() == 0:
if register_to_mlflow:
assert new_model_instance is not None
new_model_instance = self.transform_model_pre_registration(
new_model_instance,
)
if self.using_peft:

# Save and register peft model to mlflow, this code path uses our older two step logic
Expand All @@ -798,10 +802,6 @@ def tensor_hook(
temp_save_dir,
'register_save',
)
assert new_model_instance is not None
new_model_instance = self.transform_model_pre_registration(
new_model_instance,
)
new_model_instance.save_pretrained(
register_save_dir,
max_shard_size='1GB',
Expand Down Expand Up @@ -860,9 +860,6 @@ def _save_and_register_peft_model(
original_tokenizer: Optional[Any],
save_dir: str,
):
new_model_instance = self.transform_model_pre_registration(
new_model_instance,
)
components = {'model': new_model_instance}
if original_tokenizer is not None:
components['tokenizer'] = original_tokenizer
Expand Down
40 changes: 38 additions & 2 deletions llmfoundry/command_utils/data_prep/convert_delta_to_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import json
import logging
import os
import re
Expand All @@ -27,6 +28,7 @@
FaultyDataPrepCluster,
InsufficientPermissionsError,
MalformedUCTableError,
StoragePermissionError,
UCNotEnabledError,
)

Expand Down Expand Up @@ -681,7 +683,7 @@ def fetch_DT(

log.info(f'Directory {json_output_folder} created.')

# validate_and_get_cluster_info allows cluster_id to be None if use_serverless is True
# Validate_and_get_cluster_info allows cluster_id to be None if use_serverless is True.
method, dbsql, sparkSession = validate_and_get_cluster_info(
cluster_id=cluster_id,
databricks_host=DATABRICKS_HOST,
Expand All @@ -704,6 +706,14 @@ def fetch_DT(
dbsql,
)
except (grpc.RpcError, spark_errors.SparkConnectGrpcException) as e:
if isinstance(
e,
spark_errors.SparkConnectGrpcException,
) and 'is not Shared or Single User Cluster' in str(e):
raise FaultyDataPrepCluster(
message=
f'The cluster you have provided: {cluster_id} does not have data governance enabled. Please use a cluster with a data security mode other than NONE. {e}',
) from e
if isinstance(
e,
spark_errors.SparkConnectGrpcException,
Expand Down Expand Up @@ -732,12 +742,38 @@ def fetch_DT(
if dbsql is not None:
dbsql.close()

# combine downloaded jsonl into one big jsonl for IFT
# Combine downloaded jsonl into one big jsonl for IFT.
iterative_combine_jsons(
json_output_folder,
os.path.join(json_output_folder, json_output_filename),
)

_validate_written_file(
json_output_folder,
json_output_filename,
delta_table_name,
)


def _validate_written_file(
json_output_folder: str,
json_output_filename: str,
delta_table_name: str,
):
# Validate downloaded dataset is actually downloaded.
with open(os.path.join(json_output_folder, json_output_filename)) as f:
is_empty = True
for line in f.readlines():
is_empty = False
try:
json.loads(line)
except Exception as e:
raise ValueError(f'Line is not valid json: {line}') from e
if is_empty:
raise StoragePermissionError(
f'Unable to download {delta_table_name}, check network permissions.',
)


def _check_imports():
try:
Expand Down
8 changes: 8 additions & 0 deletions llmfoundry/models/mpt/modeling_mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@
from llmfoundry.models.layers.norm import LPLayerNorm # type: ignore
# isort: on

from llmfoundry.utils.warnings import VersionedDeprecationWarning

log = logging.getLogger(__name__)

CROSS_ENTROPY_IGNORE_INDEX = -100
Expand Down Expand Up @@ -1360,6 +1362,12 @@ def compute_loss_from_logits(
else:
loss = losses.sum() / (targets != loss_fn.ignore_index).sum()
if sample_weighing_factor is not None:
warnings.warn(
VersionedDeprecationWarning(
message='sample_weighing_factor has been deprecated!',
remove_version='0.17.0',
),
)
if sample_weighing_factor.shape[0] > 1:
raise ValueError(
'Sample weighing factor is not supported when batch["sample_weighing_factor"].shape[0] > 1.',
Expand Down
4 changes: 3 additions & 1 deletion llmfoundry/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,10 @@ def __init__(self, output_folder: str) -> None:
class MisconfiguredHfDatasetError(UserError):
"""Error thrown when a HuggingFace dataset is misconfigured."""

def __init__(self, dataset_name: str, split: str) -> None:
def __init__(self, dataset_name: str, split: Optional[str] = None) -> None:
message = f'Your dataset (name={dataset_name}, split={split}) is misconfigured. ' + \
'Please check your dataset format and make sure you can load your dataset locally.' \
if split is not None else f'Your dataset (name={dataset_name}) is misconfigured. ' + \
'Please check your dataset format and make sure you can load your dataset locally.'
super().__init__(message, dataset_name=dataset_name, split=split)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# build requirements
[build-system]
requires = ["setuptools < 68.0.0"]
requires = ["setuptools < 76.0.0"]
build-backend = "setuptools.build_meta"

# iSort
Expand Down
Loading

0 comments on commit 26df83f

Please sign in to comment.