Skip to content

Commit

Permalink
Merge branch 'main' into composer_lora
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg committed Jan 24, 2024
2 parents 8d6366e + 1469dcb commit f1c7e0b
Show file tree
Hide file tree
Showing 18 changed files with 234 additions and 82 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
branches:
- main
paths:
- ./Dockerfile
- Dockerfile
- .github/workflows/docker.yaml
workflow_dispatch: {}
jobs:
Expand Down
4 changes: 4 additions & 0 deletions llmfoundry/models/inference_api_wrapper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

from llmfoundry.models.inference_api_wrapper.fmapi import (
FMAPICasualLMEvalWrapper, FMAPIChatAPIEvalWrapper)
from llmfoundry.models.inference_api_wrapper.interface import \
InferenceAPIEvalWrapper
from llmfoundry.models.inference_api_wrapper.openai_causal_lm import (
Expand All @@ -10,4 +12,6 @@
'OpenAICausalLMEvalWrapper',
'OpenAIChatAPIEvalWrapper',
'InferenceAPIEvalWrapper',
'FMAPICasualLMEvalWrapper',
'FMAPIChatAPIEvalWrapper',
]
72 changes: 72 additions & 0 deletions llmfoundry/models/inference_api_wrapper/fmapi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import logging
import os
import time
from typing import Dict

import requests
from transformers import AutoTokenizer

from llmfoundry.models.inference_api_wrapper.openai_causal_lm import (
OpenAICausalLMEvalWrapper, OpenAIChatAPIEvalWrapper, OpenAIEvalInterface)

__all__ = [
'FMAPICasualLMEvalWrapper',
'FMAPIChatAPIEvalWrapper',
]

log = logging.getLogger(__name__)


def block_until_ready(base_url: str):
"""Block until the endpoint is ready."""
sleep_s = 5
timout_s = 5 * 60 # At max, wait 5 minutes

ping_url = f'{base_url}/ping'

waited_s = 0
while True:
try:
requests.get(ping_url)
log.info(f'Endpoint {ping_url} is ready')
break
except requests.exceptions.ConnectionError:
log.debug(
f'Endpoint {ping_url} not ready yet. Sleeping {sleep_s} seconds'
)
time.sleep(sleep_s)
waited_s += sleep_s

if waited_s >= timout_s:
raise TimeoutError(
f'Endpoint {ping_url} did not become read after {waited_s:,} seconds, exiting'
)


class FMAPIEvalInterface(OpenAIEvalInterface):

def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer):
is_local = model_cfg.pop('local', False)
if is_local:
base_url = os.environ.get('MOSAICML_MODEL_ENDPOINT',
'http://0.0.0.0:8080/v2')
model_cfg['base_url'] = base_url
block_until_ready(base_url)

if 'base_url' not in model_cfg:
raise ValueError(
'Must specify base_url or use local=True in model_cfg for FMAPIsEvalWrapper'
)

super().__init__(model_cfg, tokenizer)


class FMAPICasualLMEvalWrapper(FMAPIEvalInterface, OpenAICausalLMEvalWrapper):
"""Databricks Foundational Model API wrapper for causal LM models."""


class FMAPIChatAPIEvalWrapper(FMAPIEvalInterface, OpenAIChatAPIEvalWrapper):
"""Databricks Foundational Model API wrapper for chat models."""
27 changes: 22 additions & 5 deletions llmfoundry/models/inference_api_wrapper/openai_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,35 @@ class OpenAIEvalInterface(InferenceAPIEvalWrapper):

def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer) -> None:
super().__init__(model_cfg, tokenizer)
assert os.getenv(
'OPENAI_API_KEY'
) is not None, 'No OpenAI API Key found. Ensure it is saved as an environmental variable called OPENAI_API_KEY.'
try:
import openai
except ImportError as e:
raise MissingConditionalImportError(
extra_deps_group='openai',
conda_package='openai',
conda_channel='conda-forge') from e
self.client = openai.OpenAI()
self.model_name = model_cfg['version']

api_key = os.environ.get('OPENAI_API_KEY')
base_url = model_cfg.get('base_url')
if base_url is None:
# Using OpenAI default, where the API key is required
if api_key is None:
raise ValueError(
'No OpenAI API Key found. Ensure it is saved as an environmental variable called OPENAI_API_KEY.'
)

else:
# Using a custom base URL, where the API key may not be required
log.info(
f'Making request to custom base URL: {base_url}{"" if api_key is not None else " (no API key set)"}'
)
api_key = 'placeholder' # This cannot be None

self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
if 'version' in model_cfg:
self.model_name = model_cfg['version']
else:
self.model_name = model_cfg['name']

def generate_completion(self, prompt: str, num_tokens: int):
raise NotImplementedError()
Expand Down
29 changes: 22 additions & 7 deletions llmfoundry/models/layers/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ def __init__(
attn_impl: str = 'triton',
clip_qkv: Optional[float] = None,
qk_ln: bool = False,
qk_gn: bool = False,
softmax_scale: Optional[float] = None,
attn_pdrop: float = 0.0,
norm_type: str = 'low_precision_layernorm',
Expand All @@ -529,6 +530,7 @@ def __init__(
self.attn_impl = attn_impl
self.clip_qkv = clip_qkv
self.qk_ln = qk_ln
self.qk_gn = qk_gn

self.d_model = d_model
self.n_heads = n_heads
Expand All @@ -549,6 +551,8 @@ def __init__(
raise ValueError(
'Each Q head should get the same number of KV heads, so n_heads must be divisible by kv_n_heads.'
)
if qk_ln and qk_gn:
raise ValueError('Only one of qk_ln and qk_gn can be set to True.')

self.softmax_scale = softmax_scale
if self.softmax_scale is None:
Expand All @@ -572,11 +576,13 @@ def __init__(
]
self.Wqkv._fused = (0, fuse_splits)

if self.qk_ln:
if self.qk_ln or self.qk_gn:
norm_class = NORM_CLASS_REGISTRY[norm_type.lower()]
self.q_ln = norm_class(self.d_model, device=device)
self.k_ln = norm_class(self.kv_n_heads * self.head_dim,
device=device)
norm_size = self.head_dim if qk_gn else d_model
self.q_ln = norm_class(norm_size, device=device)
if qk_ln:
norm_size = self.head_dim * kv_n_heads
self.k_ln = norm_class(norm_size, device=device)

if self.attn_impl == 'flash':
self.attn_fn = flash_attn_fn
Expand Down Expand Up @@ -623,11 +629,16 @@ def forward(

key_padding_mask = attention_mask

if self.qk_ln:
if self.qk_ln or self.qk_gn:
# Applying layernorm to qk
q_shape, k_shape = query.shape, key.shape
if self.qk_gn:
b, s = query.shape[:2]
query = query.view(b, s, self.n_heads, -1)
key = key.view(b, s, self.kv_n_heads, -1)
dtype = query.dtype
query = self.q_ln(query).to(dtype)
key = self.k_ln(key).to(dtype)
query = self.q_ln(query).to(dtype).view(q_shape)
key = self.k_ln(key).to(dtype).view(k_shape)

if rotary_emb_w_meta_info is not None:
rotary_emb = rotary_emb_w_meta_info['rotary_emb']
Expand Down Expand Up @@ -712,6 +723,7 @@ def __init__(
attn_impl: str = 'triton',
clip_qkv: Optional[float] = None,
qk_ln: bool = False,
qk_gn: bool = False,
softmax_scale: Optional[float] = None,
attn_pdrop: float = 0.0,
norm_type: str = 'low_precision_layernorm',
Expand All @@ -727,6 +739,7 @@ def __init__(
attn_impl=attn_impl,
clip_qkv=clip_qkv,
qk_ln=qk_ln,
qk_gn=qk_gn,
softmax_scale=softmax_scale,
attn_pdrop=attn_pdrop,
norm_type=norm_type,
Expand All @@ -751,6 +764,7 @@ def __init__(
attn_impl: str = 'triton',
clip_qkv: Optional[float] = None,
qk_ln: bool = False,
qk_gn: bool = False,
softmax_scale: Optional[float] = None,
attn_pdrop: float = 0.0,
norm_type: str = 'low_precision_layernorm',
Expand All @@ -766,6 +780,7 @@ def __init__(
attn_impl=attn_impl,
clip_qkv=clip_qkv,
qk_ln=qk_ln,
qk_gn=qk_gn,
softmax_scale=softmax_scale,
attn_pdrop=attn_pdrop,
norm_type=norm_type,
Expand Down
1 change: 1 addition & 0 deletions llmfoundry/models/layers/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'attn_pdrop': 0.0,
'attn_impl': 'triton',
'qk_ln': False,
'qk_gn': False,
'clip_qkv': None,
'softmax_scale': None,
'prefix_lm': False,
Expand Down
8 changes: 6 additions & 2 deletions llmfoundry/models/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

from llmfoundry.models.hf import (ComposerHFCausalLM, ComposerHFPrefixLM,
ComposerHFT5)
from llmfoundry.models.inference_api_wrapper import (OpenAICausalLMEvalWrapper,
from llmfoundry.models.inference_api_wrapper import (FMAPICasualLMEvalWrapper,
FMAPIChatAPIEvalWrapper,
OpenAICausalLMEvalWrapper,
OpenAIChatAPIEvalWrapper)
from llmfoundry.models.mpt import ComposerMPTCausalLM

Expand All @@ -13,5 +15,7 @@
'hf_prefix_lm': ComposerHFPrefixLM,
'hf_t5': ComposerHFT5,
'openai_causal_lm': OpenAICausalLMEvalWrapper,
'openai_chat': OpenAIChatAPIEvalWrapper
'fmapi_causal_lm': FMAPICasualLMEvalWrapper,
'openai_chat': OpenAIChatAPIEvalWrapper,
'fmapi_chat': FMAPIChatAPIEvalWrapper,
}
1 change: 1 addition & 0 deletions llmfoundry/models/mpt/configuration_mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def __init__(
attn_pdrop (float): The dropout probability for the attention layers.
attn_impl (str): The attention implementation to use. One of 'torch', 'flash', or 'triton'.
qk_ln (bool): Whether to apply layer normalization to the queries and keys in the attention layer.
qk_gn (bool): Whether to apply group normalization to the queries and keys in the attention layer.
clip_qkv (Optional[float]): If not None, clip the queries, keys, and values in the attention layer to
this value.
softmax_scale (Optional[float]): If not None, scale the softmax in the attention layer by this value. If None,
Expand Down
15 changes: 5 additions & 10 deletions llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,21 +219,16 @@ def build_callback(


def build_logger(name: str, kwargs: Dict[str, Any]) -> LoggerDestination:
kwargs_dict = {
k: v if isinstance(v, str) else om.to_container(v, resolve=True)
for k, v in kwargs.items()
}

if name == 'wandb':
return WandBLogger(**kwargs_dict)
return WandBLogger(**kwargs)
elif name == 'tensorboard':
return TensorboardLogger(**kwargs_dict)
return TensorboardLogger(**kwargs)
elif name == 'in_memory_logger':
return InMemoryLogger(**kwargs_dict)
return InMemoryLogger(**kwargs)
elif name == 'mlflow':
return MLFlowLogger(**kwargs_dict)
return MLFlowLogger(**kwargs)
elif name == 'inmemory':
return InMemoryLogger(**kwargs_dict)
return InMemoryLogger(**kwargs)
else:
raise ValueError(f'Not sure how to build logger: {name}')

Expand Down
26 changes: 20 additions & 6 deletions llmfoundry/utils/model_download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
]
PYTORCH_WEIGHTS_PATTERN = 'pytorch_model*.bin*'
SAFE_WEIGHTS_PATTERN = 'model*.safetensors*'
TOKENIZER_FILES = [
'special_tokens_map.json',
'tokenizer.json',
'tokenizer.model',
'tokenizer_config.json',
]

ORAS_PASSWD_PLACEHOLDER = '<placeholder_for_passwd>'
ORAS_CLI = 'oras'
Expand All @@ -45,6 +51,7 @@ def download_from_hf_hub(
model: str,
save_dir: str,
prefer_safetensors: bool = True,
tokenizer_only: bool = False,
token: Optional[str] = None,
):
"""Downloads model files from a Hugging Face Hub model repo.
Expand All @@ -57,6 +64,7 @@ def download_from_hf_hub(
save_dir (str, optional): The local path to the directory where the model files will be downloaded.
prefer_safetensors (bool): Whether to prefer Safetensors weights over PyTorch weights if both are
available. Defaults to True.
tokenizer_only (bool): If true, only download tokenizer files.
token (str, optional): The HuggingFace API token. If not provided, the token will be read from the
`HUGGING_FACE_HUB_TOKEN` environment variable.
Expand Down Expand Up @@ -95,10 +103,13 @@ def download_from_hf_hub(
' Please make sure the repo contains either safetensors or pytorch weights.'
)

allow_patterns = TOKENIZER_FILES if tokenizer_only else None

download_start = time.time()
hf_hub.snapshot_download(model,
local_dir=save_dir,
ignore_patterns=ignore_patterns,
allow_patterns=allow_patterns,
token=token)
download_duration = time.time() - download_start
log.info(
Expand Down Expand Up @@ -221,16 +232,18 @@ def download_from_oras(model: str,
config_file: str,
credentials_dir: str,
save_dir: str,
tokenizer_only: bool = False,
concurrency: int = 10):
"""Download from an OCI-compliant registry using oras.
Args:
model: The name of the model to download.
config_file: Path to a YAML config file that maps model names to registry paths.
credentials_dir: Path to a directory containing credentials for the registry. It is expected to contain three
model (str): The name of the model to download.
config_file (str): Path to a YAML config file that maps model and tokenizer names to registry paths.
credentials_dir (str): Path to a directory containing credentials for the registry. It is expected to contain three
files: `username`, `password`, and `registry`, each of which contains the corresponding credential.
save_dir: Path to the directory where files will be downloaded.
concurrency: The number of concurrent downloads to run.
save_dir (str): Path to the directory where files will be downloaded.
tokenizer_only (bool): If true, only download the tokenzier files.
concurrency (int): The number of concurrent downloads to run.
"""
if shutil.which(ORAS_CLI) is None:
raise Exception(
Expand All @@ -253,7 +266,8 @@ def _read_secrets_file(secret_file_path: str,):
with open(config_file, 'r', encoding='utf-8') as f:
configs = yaml.safe_load(f.read())

path = configs['models'][model]
config_type = 'tokenizers' if tokenizer_only else 'models'
path = configs[config_type][model]
registry = secrets['registry']

def get_oras_cmd(username: Optional[str] = None,
Expand Down
4 changes: 4 additions & 0 deletions scripts/data_prep/convert_text_to_mds.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ def convert_text_to_mds(
local_output_folder = tempfile.TemporaryDirectory(
).name if is_remote_output else output_folder

if os.path.isdir(output_folder) and len(os.listdir(output_folder)) > 0:
raise FileExistsError(
f'{output_folder=} is not empty. Please remove or empty it.')

if processes > 1:
# Download and convert the text files in parallel
args = get_task_args(object_names, local_output_folder, input_folder,
Expand Down
Loading

0 comments on commit f1c7e0b

Please sign in to comment.