Add verify.py for gemma models

- gemma verification depends on gemma_pytorch - Replaced gemma.py with gemma1.py to avoid conflict with gemma_pytorch - Updated README.md accordingly PiperOrigin-RevId: 676855492
google-ai-edge · Sep 20, 2024 · 44e57bb · 44e57bb
1 parent 7701664
commit 44e57bb
Show file tree

Hide file tree

Showing 18 changed files with 314 additions and 96 deletions.
diff --git a/ai_edge_torch/generative/examples/README.md b/ai_edge_torch/generative/examples/README.md
@@ -101,6 +101,8 @@ The user needs to provide a layer name template (TensorNames) for the source
 model. For `TinyLlama`, layer names can be found from the SafeTensors file.
 
 ```python
+import ai_edge_torch.generative.utilities.loader as loading_utils
+
 safetensors = loading_utils.load_safetensors("path_to_checkpoint")
 print(safetensors.keys())
 ```
@@ -130,6 +132,28 @@ Currently, `ModelLoader` supports PyTorch state dictionary and SafeTensors
 checkpoints. We recommend testing the mapped model against your reference implementation
 using a few input samples before proceeding to the conversion step.
 
+### Verify (re)authored model
+Once the model (re)authoring is completed, it should be verified if it generates
+the output close to one from the original model. Generative API provides some
+utilities to make it easy to verify models as shown with `verify.py` in each
+example folder.
+
+To instantiate the original models, `verify.py` imports `kagglehub` and/or
+`transformers` which may require user authentication tokens to download the
+original models. Please refer
+[Kagglehub page](https://www.kaggle.com/docs/api#authentication) or
+[HuggingFace page](https://huggingface.co/docs/hub/en/security-tokens)
+about how to set user authentication tokens up.
+
+To verify Gemma models, it requires to install `gemma_pytorch` package from its
+github repository.
+
+```bash
+pip install -q -U immutabledict sentencepiece
+git clone https://github.com/google/gemma_pytorch.git
+export PYTHONPATH=$PWD/gemma_pytorch:$PYTHONPATH
+```
+
 ### Model conversion
 In this step, we use the `ai_edge_torch`'s standard multi-signature conversion API to convert PyTorch `nn.Module` to a single TFLite flatbuffer for on-device execution. For example, in `tiny_llama/convert_to_tflite.py`, we use this python code to convert the `TinyLlama` model to a multi-signature TFLite model:
 https://github.com/google-ai-edge/ai-edge-torch/blob/853301630f2b2455bd2e2f73d8a47e1a1534c91c/ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py#L26-L61

diff --git a/...ative/examples/gemma/convert_to_tflite.py → ...xamples/gemma/convert_gemma1_to_tflite.py b/...ative/examples/gemma/convert_to_tflite.py → ...xamples/gemma/convert_gemma1_to_tflite.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Example of converting a Gemma model to multi-signature tflite model."""
+"""Example of converting a Gemma1 model to multi-signature tflite model."""
 
 import os
 import pathlib
 
 from absl import app
 from absl import flags
-from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.utilities import converter
 
 _CHECKPOINT_PATH = flags.DEFINE_string(
@@ -51,7 +51,7 @@
 
 
 def main(_):
-  pytorch_model = gemma.build_2b_model(
+  pytorch_model = gemma1.build_2b_model(
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'

diff --git a/..._torch/generative/examples/gemma/gemma.py → ...torch/generative/examples/gemma/gemma1.py b/..._torch/generative/examples/gemma/gemma.py → ...torch/generative/examples/gemma/gemma1.py
@@ -13,32 +13,26 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Example of building a Gemma model."""
-
-import os
-import pathlib
+"""Example of building a Gemma1 model."""
 
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.utilities.loader as loading_utils
-import numpy as np
 import torch
 from torch import nn
 
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.up_proj",
     ff_down_proj="model.layers.{}.mlp.down_proj",
     ff_gate_proj="model.layers.{}.mlp.gate_proj",
-    attn_query_proj="model.layers.{}.self_attn.q_proj",
-    attn_key_proj="model.layers.{}.self_attn.k_proj",
-    attn_value_proj="model.layers.{}.self_attn.v_proj",
+    attn_fused_qkv_proj="model.layers.{}.self_attn.qkv_proj",
     attn_output_proj="model.layers.{}.self_attn.o_proj",
     pre_attn_norm="model.layers.{}.input_layernorm",
     post_attn_norm="model.layers.{}.post_attention_layernorm",
-    embedding="model.embed_tokens",
+    embedding="embedder",
     final_norm="model.norm",
     lm_head=None,
 )
@@ -192,30 +186,3 @@ def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   loader.load(model, strict=False)
   model.eval()
   return model
-
-
-def define_and_run_2b(checkpoint_path: str) -> None:
-  """Instantiates and runs a Gemma 2B model."""
-
-  current_dir = pathlib.Path(__file__).parent.resolve()
-  gemma_goldens = torch.load(current_dir / "gemma_lm_logits.pt")
-
-  kv_cache_max_len = 1024
-  model = build_2b_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
-  idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
-  tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.int, device="cpu")
-  tokens[0, :4] = idx
-  input_pos = torch.arange(0, kv_cache_max_len, dtype=torch.int)
-  kv = kv_utils.KVCache.from_model_config(model.config)
-  output = model.forward(tokens, input_pos, kv)
-  print("comparing with goldens..")
-  assert torch.allclose(
-      gemma_goldens, output["logits"][0, idx.shape[1] - 1, :], atol=1e-02
-  )
-
-
-if __name__ == "__main__":
-  input_checkpoint_path = os.path.join(
-      pathlib.Path.home(), "Downloads/llm_data/gemma-2b"
-  )
-  define_and_run_2b(input_checkpoint_path)
diff --git a/ai_edge_torch/generative/examples/gemma/gemma2.py b/ai_edge_torch/generative/examples/gemma/gemma2.py
@@ -267,29 +267,3 @@ def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   loader.load(model, strict=False)
   model.eval()
   return model
-
-
-def define_and_run_2b(checkpoint_path: str) -> None:
-  """Instantiates and runs a Gemma2 2B model."""
-
-  current_dir = pathlib.Path(__file__).parent.resolve()
-  gemma2_goldens = torch.load(current_dir / "gemma2it_2b_golden.pt")
-  print("Running GEMMA 2")
-  kv_cache_max_len = 1024
-  model = build_2b_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
-  toks = torch.from_numpy(
-      np.array([2, 651, 9456, 576, 573, 3520, 3858, 603, 235248])
-  )
-  tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.int, device="cpu")
-  tokens[0, :9] = toks
-  input_pos = torch.arange(0, kv_cache_max_len, dtype=torch.int)
-  kv = kv_utils.KVCache.from_model_config(model.config)
-  out = model.forward(tokens, input_pos, kv)
-  out_final = out["logits"][0, 8, :]
-  assert torch.allclose(gemma2_goldens, out_final, atol=1e-04)
-
-
-if __name__ == "__main__":
-  torch.set_printoptions(sci_mode=True)
-  path = os.path.join(pathlib.Path.home(), "Downloads/llm_data/gemma2-2b")
-  define_and_run_2b(path)
diff --git a/ai_edge_torch/generative/examples/gemma/gemma2_2b_golden.pt b/ai_edge_torch/generative/examples/gemma/gemma2_2b_golden.pt
diff --git a/ai_edge_torch/generative/examples/gemma/gemma2it_2b_golden.pt b/ai_edge_torch/generative/examples/gemma/gemma2it_2b_golden.pt
diff --git a/ai_edge_torch/generative/examples/gemma/gemma_lm_logits.pt b/ai_edge_torch/generative/examples/gemma/gemma_lm_logits.pt
diff --git a/ai_edge_torch/generative/examples/gemma/verify_gemma1.py b/ai_edge_torch/generative/examples/gemma/verify_gemma1.py
@@ -0,0 +1,55 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Verifies the reauthored Gemma1 model."""
+
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.gemma import gemma1
+from ai_edge_torch.generative.examples.gemma import verify_util
+from ai_edge_torch.generative.utilities import verifier
+import kagglehub
+
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
+
+
+def main(_):
+  checkpoint = kagglehub.model_download("google/gemma/pyTorch/2b-it")
+
+  verifier.log_msg("Building the reauthored model from", checkpoint)
+  reauthored_model = gemma1.build_2b_model(checkpoint)
+
+  verify_util.verify_reauthored_gemma_model(
+      checkpoint=checkpoint,
+      variant="2b",
+      reauthored_model=reauthored_model,
+      weight_filename="gemma-2b-it.ckpt",
+      generate_prompts=_PROMPTS.value,
+      forward_input_ids=[[1, 2, 3, 4]],
+      max_new_tokens=_MAX_NEW_TOKENS.value,
+  )
+
+
+if __name__ == "__main__":
+  app.run(main)
diff --git a/ai_edge_torch/generative/examples/gemma/verify_gemma2.py b/ai_edge_torch/generative/examples/gemma/verify_gemma2.py
@@ -0,0 +1,55 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Verifies the reauthored Gemma2 model."""
+
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.examples.gemma import verify_util
+from ai_edge_torch.generative.utilities import verifier
+import kagglehub
+
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
+
+
+def main(_):
+  checkpoint = kagglehub.model_download("google/gemma-2/pyTorch/gemma-2-2b-it")
+
+  verifier.log_msg("Building the reauthored model from", checkpoint)
+  reauthored_model = gemma2.build_2b_model(checkpoint)
+
+  verify_util.verify_reauthored_gemma_model(
+      checkpoint=checkpoint,
+      variant="2b-v2",
+      reauthored_model=reauthored_model,
+      generate_prompts=_PROMPTS.value,
+      forward_input_ids=[[2, 651, 9456, 576, 573, 3520, 3858, 603, 235248]],
+      max_new_tokens=_MAX_NEW_TOKENS.value,
+      atol=1e-04,
+  )
+
+
+if __name__ == "__main__":
+  app.run(main)