change device to cpu for qunatization

mobiusml · Oct 30, 2024 · 57565d4 · 57565d4
1 parent 464e9d9
commit 57565d4
Showing 1 changed file with 1 addition and 0 deletions.
diff --git a/aana/deployments/hqq_text_generation_deployment.py b/aana/deployments/hqq_text_generation_deployment.py
@@ -118,6 +118,7 @@ async def apply_config(self, config: dict[str, Any]):
                 self.dtype = Dtype.BFLOAT16
 
         if config_obj.quantize_on_fly:
+            self.model_kwargs.pop("device_map", None)
             self.model = AutoModelForCausalLM.from_pretrained(
                 self.model_id, torch_dtype=self.dtype.to_torch(), **self.model_kwargs
             )