From cf9f0ffef5955ff6101276022cf4fb401df445f4 Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Thu, 12 Dec 2024 19:16:12 -0500 Subject: [PATCH] comments --- tests/e2e/vLLM/configs/kv_cache_deepseek.yaml | 2 +- tests/e2e/vLLM/recipes/kv_cache/gptq.yaml | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/vLLM/recipes/kv_cache/gptq.yaml diff --git a/tests/e2e/vLLM/configs/kv_cache_deepseek.yaml b/tests/e2e/vLLM/configs/kv_cache_deepseek.yaml index 858f5c1e7..a75e9eb60 100644 --- a/tests/e2e/vLLM/configs/kv_cache_deepseek.yaml +++ b/tests/e2e/vLLM/configs/kv_cache_deepseek.yaml @@ -1,6 +1,6 @@ cadence: "nightly" test_type: "regression" -model: deepseek-ai/DeepSeek-Coder-V2-Instruct +model: deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct recipe: tests/e2e/vLLM/recipes/kv_cache/default.yaml dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft diff --git a/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml b/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml new file mode 100644 index 000000000..33208ffad --- /dev/null +++ b/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml @@ -0,0 +1,18 @@ +quant_stage: + quant_modifiers: + QuantizationModifier: + kv_cache_scheme: + {num_bits: 8, type: float, symmetric: true, strategy: tensor} + GPTQModifier: + sequential_update: false + ignore: ["lm_head"] + config_groups: + group_0: + weights: + num_bits: 4 + type: "int" + symmetric: true + strategy: "channel" + actorder: False + targets: ["Linear"] +