diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index a834131a27caaf..7e7c8d24bd6f4d 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -614,10 +614,7 @@ def forward( input_dtype = query_layer.dtype if input_dtype == torch.float32: # Handle the case where the model is quantized - if hasattr(self.config, "_pre_quantization_dtype"): - target_dtype = self.config._pre_quantization_dtype - else: - target_dtype = self.query_key_value.weight.dtype + target_dtype = getattr(self.config, "_pre_quantization_dtype", self.query_key_value.weight.dtype) logger.warning_once( f"The input hidden states seems to be silently casted in float32, this might be related to"