From 0634d3a2aa33fb23b8941da6b219a1138934d0b7 Mon Sep 17 00:00:00 2001 From: Google AI Edge Date: Fri, 13 Sep 2024 15:57:10 -0700 Subject: [PATCH] Fix typo of SmolLM. PiperOrigin-RevId: 674463485 --- ai_edge_torch/generative/examples/README.md | 6 ++-- .../examples/{smallm => smollm}/__init__.py | 0 .../{smallm => smollm}/convert_to_tflite.py | 16 +++++----- .../{smallm/smallm.py => smollm/smollm.py} | 28 +++++++++--------- .../smollm_lm_logits.pt} | Bin .../test/test_model_conversion_large.py | 8 ++--- 6 files changed, 29 insertions(+), 29 deletions(-) rename ai_edge_torch/generative/examples/{smallm => smollm}/__init__.py (100%) rename ai_edge_torch/generative/examples/{smallm => smollm}/convert_to_tflite.py (88%) rename ai_edge_torch/generative/examples/{smallm/smallm.py => smollm/smollm.py} (83%) rename ai_edge_torch/generative/examples/{smallm/smallm_lm_logits.pt => smollm/smollm_lm_logits.pt} (100%) diff --git a/ai_edge_torch/generative/examples/README.md b/ai_edge_torch/generative/examples/README.md index d43a2b30..1d389345 100644 --- a/ai_edge_torch/generative/examples/README.md +++ b/ai_edge_torch/generative/examples/README.md @@ -17,9 +17,9 @@ with 270M, 450M, 1.1B, and 3B parameters. The example we provide is OpenELM 3B, and the checkpoint for the model can be found [here](https://huggingface.co/apple/OpenELM-3B/tree/main). -## HuggingFace SmalLM -[HuggingFace SmalLM](https://huggingface.co/blog/smollm) is also a decoder-only -LLM with 135M, 360M, 1.7B parameters. The example we provide is SmalLM 135M, and +## HuggingFace SmolLM +[HuggingFace SmolLM](https://huggingface.co/blog/smollm) is also a decoder-only +LLM with 135M, 360M, 1.7B parameters. The example we provide is SmolLM 135M, and the checkpoint for the model can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM-135M). diff --git a/ai_edge_torch/generative/examples/smallm/__init__.py b/ai_edge_torch/generative/examples/smollm/__init__.py similarity index 100% rename from ai_edge_torch/generative/examples/smallm/__init__.py rename to ai_edge_torch/generative/examples/smollm/__init__.py diff --git a/ai_edge_torch/generative/examples/smallm/convert_to_tflite.py b/ai_edge_torch/generative/examples/smollm/convert_to_tflite.py similarity index 88% rename from ai_edge_torch/generative/examples/smallm/convert_to_tflite.py rename to ai_edge_torch/generative/examples/smollm/convert_to_tflite.py index 5cdf7ea7..43a2d14e 100644 --- a/ai_edge_torch/generative/examples/smallm/convert_to_tflite.py +++ b/ai_edge_torch/generative/examples/smollm/convert_to_tflite.py @@ -13,25 +13,25 @@ # limitations under the License. # ============================================================================== -"""Example of converting SmalLM model to multi-signature tflite model.""" +"""Example of converting SmolLM model to multi-signature tflite model.""" import os import pathlib import ai_edge_torch -from ai_edge_torch.generative.examples.smallm import smallm +from ai_edge_torch.generative.examples.smollm import smollm from ai_edge_torch.generative.layers import kv_cache as kv_utils from ai_edge_torch.generative.quantize import quant_recipes import torch -def convert_smallm_to_tflite( +def convert_smollm_to_tflite( checkpoint_path: str, prefill_seq_len: int = 512, kv_cache_max_len: int = 1024, quantize: bool = True, ): - """Converts SmalLM model to multi-signature tflite model. + """Converts SmolLM model to multi-signature tflite model. Args: checkpoint_path (str): The filepath to the model checkpoint, or directory @@ -43,7 +43,7 @@ def convert_smallm_to_tflite( quantize (bool, optional): Whether the model should be quanized. Defaults to True. """ - pytorch_model = smallm.build_model( + pytorch_model = smollm.build_model( checkpoint_path, kv_cache_max_len=kv_cache_max_len ) # Tensors used to trace the model graph during conversion. @@ -77,10 +77,10 @@ def convert_smallm_to_tflite( ) quant_suffix = 'q8' if quantize else 'f32' edge_model.export( - f'/tmp/smallm_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite' + f'/tmp/smollm_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite' ) if __name__ == '__main__': - path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/smallm') - convert_smallm_to_tflite(path) + path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/smollm') + convert_smollm_to_tflite(path) diff --git a/ai_edge_torch/generative/examples/smallm/smallm.py b/ai_edge_torch/generative/examples/smollm/smollm.py similarity index 83% rename from ai_edge_torch/generative/examples/smallm/smallm.py rename to ai_edge_torch/generative/examples/smollm/smollm.py index b4f3ecfa..5d48b34c 100644 --- a/ai_edge_torch/generative/examples/smallm/smallm.py +++ b/ai_edge_torch/generative/examples/smollm/smollm.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -"""Example of building a SmalLM model.""" +"""Example of building a SmolLM model.""" import copy import os @@ -28,32 +28,32 @@ from torch import nn TENSOR_NAMES = copy.copy(tiny_llama.TENSOR_NAMES) -# SmalLM re-uses the embedding as the head projection layer. +# SmolLM re-uses the embedding as the head projection layer. TENSOR_NAMES.lm_head = None -class SmalLM(tiny_llama.TinyLlama): - """A SmalLM model built from the Edge Generative API layers. +class SmolLM(tiny_llama.TinyLlama): + """A SmolLM model built from the Edge Generative API layers. - SmalLM shares the same architecture as TinyLlama, but with different model + SmolLM shares the same architecture as TinyLlama, but with different model sizes. """ def __init__(self, config: cfg.ModelConfig): super().__init__(config) - # SmalLM re-uses the embedding as the head projection layer. + # SmolLM re-uses the embedding as the head projection layer. self.lm_head.weight.data = self.tok_embedding.weight.data def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig: - """Returns the model config for a SmalLM 135M model. + """Returns the model config for a SmolLM 135M model. Args: kv_cache_max_len (int): The maximum sequence length of the KV cache. Default is 1024. Returns: - The model config for a SmalLM model. + The model config for a SmolLM model. """ attn_config = cfg.AttentionConfig( num_heads=9, @@ -90,14 +90,14 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig: config = get_model_config(**kwargs) config.vocab_size = 128 config.num_layers = 2 - # SmalLM has only one block config. + # SmolLM has only one block config. config.block_config(0).ff_config.intermediate_size = 64 return config def build_model(checkpoint_path: str, **kwargs) -> nn.Module: config = get_model_config(**kwargs) - model = SmalLM(config) + model = SmolLM(config) loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES) # Since embedding and lm-head use the same weight, we need to set strict # to False. @@ -107,10 +107,10 @@ def build_model(checkpoint_path: str, **kwargs) -> nn.Module: def define_and_run(checkpoint_path: str) -> None: - """Instantiates and runs a SmalLM model.""" + """Instantiates and runs a SmolLM model.""" current_dir = pathlib.Path(__file__).parent.resolve() - smallm_goldens = torch.load(current_dir / "smallm_lm_logits.pt") + smollm_goldens = torch.load(current_dir / "smollm_lm_logits.pt") kv_cache_max_len = 1024 model = build_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len) idx = torch.from_numpy(np.array([[1, 2, 3, 4]])) @@ -120,12 +120,12 @@ def define_and_run(checkpoint_path: str) -> None: kv = kv_utils.KVCache.from_model_config(model.config) output = model.forward(tokens, input_pos, kv) assert torch.allclose( - smallm_goldens, output["logits"][0, idx.shape[1] - 1, :], atol=1e-05 + smollm_goldens, output["logits"][0, idx.shape[1] - 1, :], atol=1e-05 ) if __name__ == "__main__": input_checkpoint_path = os.path.join( - pathlib.Path.home(), "Downloads/llm_data/smallm" + pathlib.Path.home(), "Downloads/llm_data/smollm" ) define_and_run(input_checkpoint_path) diff --git a/ai_edge_torch/generative/examples/smallm/smallm_lm_logits.pt b/ai_edge_torch/generative/examples/smollm/smollm_lm_logits.pt similarity index 100% rename from ai_edge_torch/generative/examples/smallm/smallm_lm_logits.pt rename to ai_edge_torch/generative/examples/smollm/smollm_lm_logits.pt diff --git a/ai_edge_torch/generative/test/test_model_conversion_large.py b/ai_edge_torch/generative/test/test_model_conversion_large.py index adac022f..270b8699 100644 --- a/ai_edge_torch/generative/test/test_model_conversion_large.py +++ b/ai_edge_torch/generative/test/test_model_conversion_large.py @@ -21,7 +21,7 @@ from ai_edge_torch.generative.examples.gemma import gemma2 from ai_edge_torch.generative.examples.openelm import openelm from ai_edge_torch.generative.examples.phi import phi2 -from ai_edge_torch.generative.examples.smallm import smallm +from ai_edge_torch.generative.examples.smollm import smollm from ai_edge_torch.generative.layers import kv_cache from ai_edge_torch.generative.test import utils as test_utils import numpy as np @@ -113,9 +113,9 @@ def test_phi2(self): ai_edge_config.Config.use_torch_xla, reason="tests with custom ops are not supported on oss", ) - def test_smallm(self): - config = smallm.get_fake_model_config() - pytorch_model = smallm.SmalLM(config).eval() + def test_smollm(self): + config = smollm.get_fake_model_config() + pytorch_model = smollm.SmolLM(config).eval() self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5) @googletest.skipIf(