From e8dc8ec8fe2b2ba1a64e3ce6ce9d2543ad8d349d Mon Sep 17 00:00:00 2001 From: b4rtaz Date: Sun, 2 Jun 2024 00:01:52 +0200 Subject: [PATCH] update readme.md. --- README.md | 1 + launch.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 2c94910..914e8a7 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Python 3 and C++ compiler required. The command will download the model and the | Model | Purpose | Size | Command | | ----------------------- | --------- | -------- | ----------------------------------------- | +| TinyLlama 1.1B 3T Q40 | Benchmark | 844 MB | `python launch.py tinyllama_1_1b_3t_q40` | | Llama 3 8B Q40 | Benchmark | 6.32 GB | `python launch.py llama3_8b_q40` | | Llama 3 8B Instruct Q40 | Chat, API | 6.32 GB | `python launch.py llama3_8b_instruct_q40` | diff --git a/launch.py b/launch.py index 620270b..bfd3010 100644 --- a/launch.py +++ b/launch.py @@ -4,6 +4,11 @@ # ['model-url', 'tokenizer-url', 'weights-float-type', 'buffer-float-type', 'model-type'] MODELS = { + 'tinyllama_1_1b_3t_q40': [ + 'https://huggingface.co/b4rtaz/TinyLlama-1.1B-3T-Distributed-Llama/resolve/main/dllama_model_tinylama_1.1b_3t_q40.m?download=true', + 'https://huggingface.co/b4rtaz/TinyLlama-1.1B-3T-Distributed-Llama/resolve/main/dllama_tokenizer_tinylama_1.1b_3t.t?download=true', + 'q40', 'q80', 'base' + ], 'llama3_8b_q40': [ 'https://huggingface.co/b4rtaz/Llama-3-8B-Q40-Distributed-Llama/resolve/main/dllama_model_meta-llama-3-8b_q40.m?download=true', 'https://huggingface.co/b4rtaz/Llama-3-8B-Q40-Distributed-Llama/resolve/main/dllama_tokenizer_llama3.t?download=true',