diff --git a/ludwig/models/llm.py b/ludwig/models/llm.py index 52c67352fc6..217fd16e202 100644 --- a/ludwig/models/llm.py +++ b/ludwig/models/llm.py @@ -21,6 +21,7 @@ from ludwig.utils.error_handling_utils import default_retry from ludwig.utils.llm_quantization_utils import convert_quantized_linear_to_linear from ludwig.utils.llm_utils import ( + _MODELS_WITH_DEVICE_MAP_AUTO_EXCLUSION, add_left_padding, generate_merged_ids, get_context_len, @@ -87,7 +88,8 @@ def load_pretrained_from_config( # Apply quanitzation configuration at model load time load_kwargs["torch_dtype"] = getattr(torch, config_obj.quantization.bnb_4bit_compute_dtype) load_kwargs["quantization_config"] = config_obj.quantization.to_bitsandbytes() - load_kwargs["device_map"] = "auto" + if config_obj.base_model not in _MODELS_WITH_DEVICE_MAP_AUTO_EXCLUSION: + load_kwargs["device_map"] = "auto" if config_obj.model_parameters: # Add any model specific parameters to the load kwargs diff --git a/ludwig/schema/llms/base_model.py b/ludwig/schema/llms/base_model.py index 3f48b55694a..92e510c899c 100644 --- a/ludwig/schema/llms/base_model.py +++ b/ludwig/schema/llms/base_model.py @@ -9,6 +9,7 @@ from ludwig.error import ConfigValidationError from ludwig.schema.metadata import LLM_METADATA from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json +from ludwig.utils.llm_utils import _PHI_BASE_MODEL_MAPPING # Maps a preset LLM name to the full slash-delimited HF path. If the user chooses a preset LLM, the preset LLM name is # replaced with the full slash-delimited HF path using this map, after JSON validation but before config object @@ -72,6 +73,8 @@ def validate(model_name: str): return MODEL_PRESETS[model_name] if os.path.isdir(model_name): return model_name + if model_name in _PHI_BASE_MODEL_MAPPING: + return _PHI_BASE_MODEL_MAPPING[model_name] try: AutoConfig.from_pretrained(model_name) return model_name diff --git a/ludwig/schema/model_types/utils.py b/ludwig/schema/model_types/utils.py index b8550d06838..929bb42ea5d 100644 --- a/ludwig/schema/model_types/utils.py +++ b/ludwig/schema/model_types/utils.py @@ -34,7 +34,7 @@ from ludwig.schema.trainer import ECDTrainerConfig from ludwig.types import HyperoptConfigDict, ModelConfigDict from ludwig.utils.data_utils import get_sanitized_feature_name -from ludwig.utils.llm_utils import get_context_len +from ludwig.utils.llm_utils import _PHI_BASE_MODEL_MAPPING, get_context_len if TYPE_CHECKING: from ludwig.schema.model_types.base import ModelConfig @@ -307,6 +307,9 @@ def set_llm_parameters(config: "ModelConfig") -> None: if config.model_type != MODEL_LLM: return + # Do an in-place replacement for Phi models since they don't work well out of the box + _replace_phi_model_with_supported_model(config) + # Set preprocessing parameters for text features for LLM model type _set_llm_tokenizers(config) @@ -314,6 +317,19 @@ def set_llm_parameters(config: "ModelConfig") -> None: _set_generation_max_new_tokens(config) +def _replace_phi_model_with_supported_model(config: "ModelConfig") -> None: + """Replaces the phi model with a supported model that is compatible with the LLM model type.""" + if config.base_model not in _PHI_BASE_MODEL_MAPPING: + return + + logger.warning( + f"{config.base_model} does not work correctly out of the box since it requires running remote code." + f"Replacing {config.base_model} with {_PHI_BASE_MODEL_MAPPING[config.base_model]} as the base LLM model." + ) + + config.base_model = _PHI_BASE_MODEL_MAPPING[config.base_model] + + def _set_llm_tokenizers(config: "ModelConfig") -> None: """Sets the tokenizers for the LLM model to the pretrained model name or path. This ensures that they use the correct shared vocabulary from the tokenizer. diff --git a/ludwig/utils/llm_utils.py b/ludwig/utils/llm_utils.py index 9a0a65bd79f..77ab4c5f63e 100644 --- a/ludwig/utils/llm_utils.py +++ b/ludwig/utils/llm_utils.py @@ -18,6 +18,16 @@ FALLBACK_CONTEXT_LEN = 2048 +# The official microsoft phi models don't work out of the box because the weights aren't compatiable with HF +# See https://github.com/huggingface/transformers/issues/28049 for more context. +_PHI_BASE_MODEL_MAPPING = { + "microsoft/phi-1": "susnato/phi-1_dev", + "microsoft/phi-1.5": "susnato/phi-1_5_dev", +} + +# The susnato Phi models as of Transformers 4.36.1 don't support "device_map='auto'" at model load time. +_MODELS_WITH_DEVICE_MAP_AUTO_EXCLUSION = {"susnato/phi-1_dev", "susnato/phi-1_5_dev"} + def to_device( model: PreTrainedModel, @@ -54,11 +64,13 @@ def to_device( model_kwargs.update( dict( low_cpu_mem_usage=True, - device_map="auto", max_memory={i: "13GiB" for i in range(num_gpus)}, ) ) + if config_obj.base_model not in _MODELS_WITH_DEVICE_MAP_AUTO_EXCLUSION: + model_kwargs["device_map"] = "auto" + if config_obj.quantization: model_kwargs["quantization_config"] = config_obj.quantization.to_bitsandbytes()