From 537480a36ddbfcb620faade490e167e30552079a Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 31 Oct 2024 23:25:26 +0000 Subject: [PATCH 1/3] Add support for MobileLLM --- README.md | 1 + docs/snippets/6_supported-models.snippet | 1 + scripts/convert.py | 1 + src/configs.js | 1 + src/models.js | 10 ++++++++++ 5 files changed, 14 insertions(+) diff --git a/README.md b/README.md index 474b87f18..7185dbfe8 100644 --- a/README.md +++ b/README.md @@ -352,6 +352,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli. 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou. 1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel. +1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra. 1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam. 1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. 1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index f1bcdad44..8e97152c7 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -67,6 +67,7 @@ 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli. 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou. 1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel. +1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra. 1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam. 1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. 1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. diff --git a/scripts/convert.py b/scripts/convert.py index bf9265e48..c94d55585 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -41,6 +41,7 @@ 'starcoder2', 'openelm', 'gemma', + 'mobilellm', # Encoder-decoder models 'whisper', diff --git a/src/configs.js b/src/configs.js index 4bc95cf80..1a15af4c1 100644 --- a/src/configs.js +++ b/src/configs.js @@ -91,6 +91,7 @@ function getNormalizedConfig(config) { mapping['hidden_size'] = 'hidden_size'; break; case 'llama': + case 'mobilellm': case 'granite': case 'cohere': case 'mistral': diff --git a/src/models.js b/src/models.js index d357a83e4..eca48dbd0 100644 --- a/src/models.js +++ b/src/models.js @@ -3810,6 +3810,14 @@ export class LlamaForCausalLM extends LlamaPreTrainedModel { } ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// MobileLLM models +export class MobileLLMPreTrainedModel extends PreTrainedModel { } +export class MobileLLMModel extends MobileLLMPreTrainedModel { } +export class MobileLLMForCausalLM extends MobileLLMPreTrainedModel { } +////////////////////////////////////////////////// + + ////////////////////////////////////////////////// // Granite models export class GranitePreTrainedModel extends PreTrainedModel { } @@ -6125,6 +6133,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]], ['codegen', ['CodeGenModel', CodeGenModel]], ['llama', ['LlamaModel', LlamaModel]], + ['mobilellm', ['MobileLLMModel', MobileLLMModel]], ['granite', ['GraniteModel', GraniteModel]], ['cohere', ['CohereModel', CohereModel]], ['gemma', ['GemmaModel', GemmaModel]], @@ -6214,6 +6223,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]], ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]], ['llama', ['LlamaForCausalLM', LlamaForCausalLM]], + ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]], ['granite', ['GraniteForCausalLM', GraniteForCausalLM]], ['cohere', ['CohereForCausalLM', CohereForCausalLM]], ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]], From 4a41945c3bf00f094c72b36e49ed0a9bfbba40a8 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sun, 3 Nov 2024 04:29:08 +0000 Subject: [PATCH 2/3] Bump conversion script versions in requirements.txt --- scripts/requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 9773d04e7..c6616010e 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,9 +1,9 @@ -transformers[torch]==4.43.4 -onnxruntime==1.19.2 -optimum==1.21.3 +transformers[torch]==4.46.1 +onnxruntime==1.20.0 +optimum==1.23.3 onnx==1.16.2 onnxconverter-common==1.14.0 -tqdm==4.66.5 -onnxslim==0.1.31 +tqdm==4.66.6 +onnxslim==0.1.36 --extra-index-url https://pypi.ngc.nvidia.com onnx_graphsurgeon==0.3.27 From 5a6864a7bcf083ce192c56ca4588bba60159a991 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sun, 3 Nov 2024 04:38:03 +0000 Subject: [PATCH 3/3] Bump dependencies --- package-lock.json | 42 ++++++++++++++++++++---------------------- package.json | 6 +++--- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/package-lock.json b/package-lock.json index d1b926a33..69839ff0c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,9 +9,9 @@ "version": "3.0.1", "license": "Apache-2.0", "dependencies": { - "@huggingface/jinja": "^0.3.0", - "onnxruntime-node": "1.19.2", - "onnxruntime-web": "1.21.0-dev.20241024-d9ca84ef96", + "@huggingface/jinja": "^0.3.2", + "onnxruntime-node": "1.20.0", + "onnxruntime-web": "1.20.0", "sharp": "^0.33.5" }, "devDependencies": { @@ -753,9 +753,10 @@ } }, "node_modules/@huggingface/jinja": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.3.0.tgz", - "integrity": "sha512-GLJzso0M07ZncFkrJMIXVU4os6GFbPocD4g8fMQPMGJubf48FtGOsUORH2rtFdXPIPelz8SLBMn8ZRmOTwXm9Q==", + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.3.2.tgz", + "integrity": "sha512-F2FvuIc+w1blGsaqJI/OErRbWH6bVJDCBI8Rm5D86yZ2wlwrGERsfIaru7XUv9eYC3DMP3ixDRRtF0h6d8AZcQ==", + "license": "MIT", "engines": { "node": ">=18" } @@ -6069,44 +6070,41 @@ } }, "node_modules/onnxruntime-common": { - "version": "1.19.2", - "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.19.2.tgz", - "integrity": "sha512-a4R7wYEVFbZBlp0BfhpbFWqe4opCor3KM+5Wm22Az3NGDcQMiU2hfG/0MfnBs+1ZrlSGmlgWeMcXQkDk1UFb8Q==" + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.0.tgz", + "integrity": "sha512-9ehS4ul5fBszIcHhfxuDgk45lO+Fqrxmrgwk1Pxb1JRvbQiCB/v9Royv95SRCWHktLMviqNjBsEd/biJhd39cg==", + "license": "MIT" }, "node_modules/onnxruntime-node": { - "version": "1.19.2", - "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.19.2.tgz", - "integrity": "sha512-9eHMP/HKbbeUcqte1JYzaaRC8JPn7ojWeCeoyShO86TOR97OCyIyAIOGX3V95ErjslVhJRXY8Em/caIUc0hm1Q==", + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.20.0.tgz", + "integrity": "sha512-mjLge++8WHfyCZ4IqZ1FbUbtFAfGht7BLCkOeBL1L9PFV27YHwluXkNt7m0Pgf6TR2P5pqVZsD3zqFbFP6QTMw==", "hasInstallScript": true, + "license": "MIT", "os": [ "win32", "darwin", "linux" ], "dependencies": { - "onnxruntime-common": "1.19.2", + "onnxruntime-common": "1.20.0", "tar": "^7.0.1" } }, "node_modules/onnxruntime-web": { - "version": "1.21.0-dev.20241024-d9ca84ef96", - "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.21.0-dev.20241024-d9ca84ef96.tgz", - "integrity": "sha512-ANSQfMALvCviN3Y4tvTViKofKToV1WUb2r2VjZVCi3uUBPaK15oNJyIxhsNyEckBr/Num3JmSXlkHOD8HfVzSQ==", + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.20.0.tgz", + "integrity": "sha512-IoUf8dqHFJLV4DUSz+Ok+xxyN6cQk57gb20m6PZE5gag3QXuvegYMq9dG8t/QF4JjTKIwvfvnr16ouzCCB9IMA==", "license": "MIT", "dependencies": { "flatbuffers": "^1.12.0", "guid-typescript": "^1.0.9", "long": "^5.2.3", - "onnxruntime-common": "1.20.0-dev.20241016-2b8fc5529b", + "onnxruntime-common": "1.20.0", "platform": "^1.3.6", "protobufjs": "^7.2.4" } }, - "node_modules/onnxruntime-web/node_modules/onnxruntime-common": { - "version": "1.20.0-dev.20241016-2b8fc5529b", - "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.0-dev.20241016-2b8fc5529b.tgz", - "integrity": "sha512-KZK8b6zCYGZFjd4ANze0pqBnqnFTS3GIVeclQpa2qseDpXrCQJfkWBixRcrZShNhm3LpFOZ8qJYFC5/qsJK9WQ==" - }, "node_modules/open": { "version": "8.4.2", "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz", diff --git a/package.json b/package.json index d1a519490..ced9b5b72 100644 --- a/package.json +++ b/package.json @@ -61,9 +61,9 @@ }, "homepage": "https://github.com/huggingface/transformers.js#readme", "dependencies": { - "@huggingface/jinja": "^0.3.0", - "onnxruntime-node": "1.19.2", - "onnxruntime-web": "1.21.0-dev.20241024-d9ca84ef96", + "@huggingface/jinja": "^0.3.2", + "onnxruntime-node": "1.20.0", + "onnxruntime-web": "1.20.0", "sharp": "^0.33.5" }, "devDependencies": {