Skip to content

Commit

Permalink
Fix typo of SmolLM.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 674463485
  • Loading branch information
ai-edge-bot authored and copybara-github committed Sep 13, 2024
1 parent 668d04e commit 0634d3a
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 29 deletions.
6 changes: 3 additions & 3 deletions ai_edge_torch/generative/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ with 270M, 450M, 1.1B, and 3B parameters. The example we provide is OpenELM 3B,
and the checkpoint for the model can be found
[here](https://huggingface.co/apple/OpenELM-3B/tree/main).

## HuggingFace SmalLM
[HuggingFace SmalLM](https://huggingface.co/blog/smollm) is also a decoder-only
LLM with 135M, 360M, 1.7B parameters. The example we provide is SmalLM 135M, and
## HuggingFace SmolLM
[HuggingFace SmolLM](https://huggingface.co/blog/smollm) is also a decoder-only
LLM with 135M, 360M, 1.7B parameters. The example we provide is SmolLM 135M, and
the checkpoint for the model can be found
[here](https://huggingface.co/HuggingFaceTB/SmolLM-135M).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,25 @@
# limitations under the License.
# ==============================================================================

"""Example of converting SmalLM model to multi-signature tflite model."""
"""Example of converting SmolLM model to multi-signature tflite model."""

import os
import pathlib

import ai_edge_torch
from ai_edge_torch.generative.examples.smallm import smallm
from ai_edge_torch.generative.examples.smollm import smollm
from ai_edge_torch.generative.layers import kv_cache as kv_utils
from ai_edge_torch.generative.quantize import quant_recipes
import torch


def convert_smallm_to_tflite(
def convert_smollm_to_tflite(
checkpoint_path: str,
prefill_seq_len: int = 512,
kv_cache_max_len: int = 1024,
quantize: bool = True,
):
"""Converts SmalLM model to multi-signature tflite model.
"""Converts SmolLM model to multi-signature tflite model.
Args:
checkpoint_path (str): The filepath to the model checkpoint, or directory
Expand All @@ -43,7 +43,7 @@ def convert_smallm_to_tflite(
quantize (bool, optional): Whether the model should be quanized. Defaults
to True.
"""
pytorch_model = smallm.build_model(
pytorch_model = smollm.build_model(
checkpoint_path, kv_cache_max_len=kv_cache_max_len
)
# Tensors used to trace the model graph during conversion.
Expand Down Expand Up @@ -77,10 +77,10 @@ def convert_smallm_to_tflite(
)
quant_suffix = 'q8' if quantize else 'f32'
edge_model.export(
f'/tmp/smallm_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite'
f'/tmp/smollm_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite'
)


if __name__ == '__main__':
path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/smallm')
convert_smallm_to_tflite(path)
path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/smollm')
convert_smollm_to_tflite(path)
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================

"""Example of building a SmalLM model."""
"""Example of building a SmolLM model."""

import copy
import os
Expand All @@ -28,32 +28,32 @@
from torch import nn

TENSOR_NAMES = copy.copy(tiny_llama.TENSOR_NAMES)
# SmalLM re-uses the embedding as the head projection layer.
# SmolLM re-uses the embedding as the head projection layer.
TENSOR_NAMES.lm_head = None


class SmalLM(tiny_llama.TinyLlama):
"""A SmalLM model built from the Edge Generative API layers.
class SmolLM(tiny_llama.TinyLlama):
"""A SmolLM model built from the Edge Generative API layers.
SmalLM shares the same architecture as TinyLlama, but with different model
SmolLM shares the same architecture as TinyLlama, but with different model
sizes.
"""

def __init__(self, config: cfg.ModelConfig):
super().__init__(config)
# SmalLM re-uses the embedding as the head projection layer.
# SmolLM re-uses the embedding as the head projection layer.
self.lm_head.weight.data = self.tok_embedding.weight.data


def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
"""Returns the model config for a SmalLM 135M model.
"""Returns the model config for a SmolLM 135M model.
Args:
kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
is 1024.
Returns:
The model config for a SmalLM model.
The model config for a SmolLM model.
"""
attn_config = cfg.AttentionConfig(
num_heads=9,
Expand Down Expand Up @@ -90,14 +90,14 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
config = get_model_config(**kwargs)
config.vocab_size = 128
config.num_layers = 2
# SmalLM has only one block config.
# SmolLM has only one block config.
config.block_config(0).ff_config.intermediate_size = 64
return config


def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
config = get_model_config(**kwargs)
model = SmalLM(config)
model = SmolLM(config)
loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
# Since embedding and lm-head use the same weight, we need to set strict
# to False.
Expand All @@ -107,10 +107,10 @@ def build_model(checkpoint_path: str, **kwargs) -> nn.Module:


def define_and_run(checkpoint_path: str) -> None:
"""Instantiates and runs a SmalLM model."""
"""Instantiates and runs a SmolLM model."""

current_dir = pathlib.Path(__file__).parent.resolve()
smallm_goldens = torch.load(current_dir / "smallm_lm_logits.pt")
smollm_goldens = torch.load(current_dir / "smollm_lm_logits.pt")
kv_cache_max_len = 1024
model = build_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
Expand All @@ -120,12 +120,12 @@ def define_and_run(checkpoint_path: str) -> None:
kv = kv_utils.KVCache.from_model_config(model.config)
output = model.forward(tokens, input_pos, kv)
assert torch.allclose(
smallm_goldens, output["logits"][0, idx.shape[1] - 1, :], atol=1e-05
smollm_goldens, output["logits"][0, idx.shape[1] - 1, :], atol=1e-05
)


if __name__ == "__main__":
input_checkpoint_path = os.path.join(
pathlib.Path.home(), "Downloads/llm_data/smallm"
pathlib.Path.home(), "Downloads/llm_data/smollm"
)
define_and_run(input_checkpoint_path)
8 changes: 4 additions & 4 deletions ai_edge_torch/generative/test/test_model_conversion_large.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ai_edge_torch.generative.examples.gemma import gemma2
from ai_edge_torch.generative.examples.openelm import openelm
from ai_edge_torch.generative.examples.phi import phi2
from ai_edge_torch.generative.examples.smallm import smallm
from ai_edge_torch.generative.examples.smollm import smollm
from ai_edge_torch.generative.layers import kv_cache
from ai_edge_torch.generative.test import utils as test_utils
import numpy as np
Expand Down Expand Up @@ -113,9 +113,9 @@ def test_phi2(self):
ai_edge_config.Config.use_torch_xla,
reason="tests with custom ops are not supported on oss",
)
def test_smallm(self):
config = smallm.get_fake_model_config()
pytorch_model = smallm.SmalLM(config).eval()
def test_smollm(self):
config = smollm.get_fake_model_config()
pytorch_model = smollm.SmolLM(config).eval()
self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)

@googletest.skipIf(
Expand Down

0 comments on commit 0634d3a

Please sign in to comment.