From 537480a36ddbfcb620faade490e167e30552079a Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Thu, 31 Oct 2024 23:25:26 +0000
Subject: [PATCH 1/3] Add support for MobileLLM

---
 README.md                                |  1 +
 docs/snippets/6_supported-models.snippet |  1 +
 scripts/convert.py                       |  1 +
 src/configs.js                           |  1 +
 src/models.js                            | 10 ++++++++++
 5 files changed, 14 insertions(+)

diff --git a/README.md b/README.md
index 474b87f18..7185dbfe8 100644
--- a/README.md
+++ b/README.md
@@ -352,6 +352,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
 1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.
+1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra.
 1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
 1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
 1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam.
diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet
index f1bcdad44..8e97152c7 100644
--- a/docs/snippets/6_supported-models.snippet
+++ b/docs/snippets/6_supported-models.snippet
@@ -67,6 +67,7 @@
 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
 1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.
+1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra.
 1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
 1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
 1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam.
diff --git a/scripts/convert.py b/scripts/convert.py
index bf9265e48..c94d55585 100644
--- a/scripts/convert.py
+++ b/scripts/convert.py
@@ -41,6 +41,7 @@
     'starcoder2',
     'openelm',
     'gemma',
+    'mobilellm',
 
     # Encoder-decoder models
     'whisper',
diff --git a/src/configs.js b/src/configs.js
index 4bc95cf80..1a15af4c1 100644
--- a/src/configs.js
+++ b/src/configs.js
@@ -91,6 +91,7 @@ function getNormalizedConfig(config) {
             mapping['hidden_size'] = 'hidden_size';
             break;
         case 'llama':
+        case 'mobilellm':
         case 'granite':
         case 'cohere':
         case 'mistral':
diff --git a/src/models.js b/src/models.js
index d357a83e4..eca48dbd0 100644
--- a/src/models.js
+++ b/src/models.js
@@ -3810,6 +3810,14 @@ export class LlamaForCausalLM extends LlamaPreTrainedModel { }
 //////////////////////////////////////////////////
 
 
+//////////////////////////////////////////////////
+// MobileLLM models
+export class MobileLLMPreTrainedModel extends PreTrainedModel { }
+export class MobileLLMModel extends MobileLLMPreTrainedModel { }
+export class MobileLLMForCausalLM extends MobileLLMPreTrainedModel { }
+//////////////////////////////////////////////////
+
+
 //////////////////////////////////////////////////
 // Granite models
 export class GranitePreTrainedModel extends PreTrainedModel { }
@@ -6125,6 +6133,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
     ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
     ['codegen', ['CodeGenModel', CodeGenModel]],
     ['llama', ['LlamaModel', LlamaModel]],
+    ['mobilellm', ['MobileLLMModel', MobileLLMModel]],
     ['granite', ['GraniteModel', GraniteModel]],
     ['cohere', ['CohereModel', CohereModel]],
     ['gemma', ['GemmaModel', GemmaModel]],
@@ -6214,6 +6223,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
     ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
     ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
     ['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
+    ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
     ['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
     ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
     ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],

From 4a41945c3bf00f094c72b36e49ed0a9bfbba40a8 Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Sun, 3 Nov 2024 04:29:08 +0000
Subject: [PATCH 2/3] Bump conversion script versions in requirements.txt

---
 scripts/requirements.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 9773d04e7..c6616010e 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,9 +1,9 @@
-transformers[torch]==4.43.4
-onnxruntime==1.19.2
-optimum==1.21.3
+transformers[torch]==4.46.1
+onnxruntime==1.20.0
+optimum==1.23.3
 onnx==1.16.2
 onnxconverter-common==1.14.0
-tqdm==4.66.5
-onnxslim==0.1.31
+tqdm==4.66.6
+onnxslim==0.1.36
 --extra-index-url https://pypi.ngc.nvidia.com
 onnx_graphsurgeon==0.3.27

From 5a6864a7bcf083ce192c56ca4588bba60159a991 Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Sun, 3 Nov 2024 04:38:03 +0000
Subject: [PATCH 3/3] Bump dependencies

---
 package-lock.json | 42 ++++++++++++++++++++----------------------
 package.json      |  6 +++---
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index d1b926a33..69839ff0c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,9 +9,9 @@
       "version": "3.0.1",
       "license": "Apache-2.0",
       "dependencies": {
-        "@huggingface/jinja": "^0.3.0",
-        "onnxruntime-node": "1.19.2",
-        "onnxruntime-web": "1.21.0-dev.20241024-d9ca84ef96",
+        "@huggingface/jinja": "^0.3.2",
+        "onnxruntime-node": "1.20.0",
+        "onnxruntime-web": "1.20.0",
         "sharp": "^0.33.5"
       },
       "devDependencies": {
@@ -753,9 +753,10 @@
       }
     },
     "node_modules/@huggingface/jinja": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.3.0.tgz",
-      "integrity": "sha512-GLJzso0M07ZncFkrJMIXVU4os6GFbPocD4g8fMQPMGJubf48FtGOsUORH2rtFdXPIPelz8SLBMn8ZRmOTwXm9Q==",
+      "version": "0.3.2",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.3.2.tgz",
+      "integrity": "sha512-F2FvuIc+w1blGsaqJI/OErRbWH6bVJDCBI8Rm5D86yZ2wlwrGERsfIaru7XUv9eYC3DMP3ixDRRtF0h6d8AZcQ==",
+      "license": "MIT",
       "engines": {
         "node": ">=18"
       }
@@ -6069,44 +6070,41 @@
       }
     },
     "node_modules/onnxruntime-common": {
-      "version": "1.19.2",
-      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.19.2.tgz",
-      "integrity": "sha512-a4R7wYEVFbZBlp0BfhpbFWqe4opCor3KM+5Wm22Az3NGDcQMiU2hfG/0MfnBs+1ZrlSGmlgWeMcXQkDk1UFb8Q=="
+      "version": "1.20.0",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.0.tgz",
+      "integrity": "sha512-9ehS4ul5fBszIcHhfxuDgk45lO+Fqrxmrgwk1Pxb1JRvbQiCB/v9Royv95SRCWHktLMviqNjBsEd/biJhd39cg==",
+      "license": "MIT"
     },
     "node_modules/onnxruntime-node": {
-      "version": "1.19.2",
-      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.19.2.tgz",
-      "integrity": "sha512-9eHMP/HKbbeUcqte1JYzaaRC8JPn7ojWeCeoyShO86TOR97OCyIyAIOGX3V95ErjslVhJRXY8Em/caIUc0hm1Q==",
+      "version": "1.20.0",
+      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.20.0.tgz",
+      "integrity": "sha512-mjLge++8WHfyCZ4IqZ1FbUbtFAfGht7BLCkOeBL1L9PFV27YHwluXkNt7m0Pgf6TR2P5pqVZsD3zqFbFP6QTMw==",
       "hasInstallScript": true,
+      "license": "MIT",
       "os": [
         "win32",
         "darwin",
         "linux"
       ],
       "dependencies": {
-        "onnxruntime-common": "1.19.2",
+        "onnxruntime-common": "1.20.0",
         "tar": "^7.0.1"
       }
     },
     "node_modules/onnxruntime-web": {
-      "version": "1.21.0-dev.20241024-d9ca84ef96",
-      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.21.0-dev.20241024-d9ca84ef96.tgz",
-      "integrity": "sha512-ANSQfMALvCviN3Y4tvTViKofKToV1WUb2r2VjZVCi3uUBPaK15oNJyIxhsNyEckBr/Num3JmSXlkHOD8HfVzSQ==",
+      "version": "1.20.0",
+      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.20.0.tgz",
+      "integrity": "sha512-IoUf8dqHFJLV4DUSz+Ok+xxyN6cQk57gb20m6PZE5gag3QXuvegYMq9dG8t/QF4JjTKIwvfvnr16ouzCCB9IMA==",
       "license": "MIT",
       "dependencies": {
         "flatbuffers": "^1.12.0",
         "guid-typescript": "^1.0.9",
         "long": "^5.2.3",
-        "onnxruntime-common": "1.20.0-dev.20241016-2b8fc5529b",
+        "onnxruntime-common": "1.20.0",
         "platform": "^1.3.6",
         "protobufjs": "^7.2.4"
       }
     },
-    "node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
-      "version": "1.20.0-dev.20241016-2b8fc5529b",
-      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.0-dev.20241016-2b8fc5529b.tgz",
-      "integrity": "sha512-KZK8b6zCYGZFjd4ANze0pqBnqnFTS3GIVeclQpa2qseDpXrCQJfkWBixRcrZShNhm3LpFOZ8qJYFC5/qsJK9WQ=="
-    },
     "node_modules/open": {
       "version": "8.4.2",
       "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
diff --git a/package.json b/package.json
index d1a519490..ced9b5b72 100644
--- a/package.json
+++ b/package.json
@@ -61,9 +61,9 @@
   },
   "homepage": "https://github.com/huggingface/transformers.js#readme",
   "dependencies": {
-    "@huggingface/jinja": "^0.3.0",
-    "onnxruntime-node": "1.19.2",
-    "onnxruntime-web": "1.21.0-dev.20241024-d9ca84ef96",
+    "@huggingface/jinja": "^0.3.2",
+    "onnxruntime-node": "1.20.0",
+    "onnxruntime-web": "1.20.0",
     "sharp": "^0.33.5"
   },
   "devDependencies": {