From 66523c3d03e287d044d9ae26a9cd0b569bcd4e3c Mon Sep 17 00:00:00 2001 From: Charles Goddard Date: Mon, 18 Dec 2023 14:55:24 -0800 Subject: [PATCH] Big hammer for Mixtral sliding window config --- mergekit/scripts/mixtral_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mergekit/scripts/mixtral_moe.py b/mergekit/scripts/mixtral_moe.py index c03da918..08559c5c 100644 --- a/mergekit/scripts/mixtral_moe.py +++ b/mergekit/scripts/mixtral_moe.py @@ -162,6 +162,7 @@ def build( out_cfg = MixtralConfig(**base_cfg.to_dict()) out_cfg.architectures = ["MixtralForCausalLM"] out_cfg.num_local_experts = len(config.experts) + out_cfg.sliding_window = None out_cfg.save_pretrained(out_path) if (out_cfg.num_local_experts & (out_cfg.num_local_experts - 1)) != 0: