Skip to content

Commit

Permalink
Falcon BetterTransformer requires transformers>=4.34 (#1431)
Browse files Browse the repository at this point in the history
* falcon BT requires transformers>=4.34

* more fix
  • Loading branch information
fxmarty authored Oct 6, 2023
1 parent 8c296d3 commit 099cd73
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 11 deletions.
3 changes: 2 additions & 1 deletion optimum/bettertransformer/models/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,7 @@ def falcon_forward(
alibi: Optional[torch.Tensor],
attention_mask: torch.Tensor,
layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
position_ids: Optional[torch.LongTensor] = None,
head_mask: Optional[torch.Tensor] = None,
use_cache: bool = False,
output_attentions: bool = False,
Expand All @@ -937,7 +938,7 @@ def falcon_forward(
value_layer = value_layer.transpose(1, 2).reshape(batch_size * num_kv_heads, query_length, self.head_dim)

past_kv_length = 0 if layer_past is None else layer_past[0].shape[1]
query_layer, key_layer = self.maybe_rotary(query_layer, key_layer, past_kv_length)
query_layer, key_layer = self.maybe_rotary(query_layer, key_layer, past_kv_length, position_ids)

if layer_past is not None:
past_key, past_value = layer_past
Expand Down
2 changes: 1 addition & 1 deletion optimum/bettertransformer/models/decoder_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
else:
from ...utils.dummy_bettertransformer_objects import BarkSelfAttention

if check_if_transformers_greater("4.32"):
if check_if_transformers_greater("4.34"):
from transformers.models.falcon.modeling_falcon import FalconAttention
else:
from ...utils.dummy_bettertransformer_objects import FalconAttention
Expand Down
4 changes: 2 additions & 2 deletions optimum/utils/dummy_bettertransformer_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def __init__(self, *args, **kwargs):


class FalconAttention(metaclass=DummyObject):
_backends = ["transformers_432"]
_backends = ["transformers_434"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["transformers_432"])
requires_backends(self, ["transformers_434"])


def _llama_prepare_decoder_attention_mask(*args, **kwargs):
Expand Down
4 changes: 4 additions & 0 deletions optimum/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ def require_numpy_strictly_lower(version: str, message: str):
"transformers_432",
(lambda: check_if_transformers_greater("4.32"), "{0} " + TRANSFORMERS_IMPORT_ERROR.format("4.32")),
),
(
"transformers_434",
(lambda: check_if_transformers_greater("4.34"), "{0} " + TRANSFORMERS_IMPORT_ERROR.format("4.34")),
),
]
)

Expand Down
33 changes: 28 additions & 5 deletions optimum/utils/input_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,21 +862,44 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int


class FalconDummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator):
def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
self.num_kv_heads = 1
head_dim = self.hidden_size // self.num_attention_heads
def __init__(
self,
task: str,
normalized_config: NormalizedTextConfig,
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
random_batch_size_range: Optional[Tuple[int, int]] = None,
random_sequence_length_range: Optional[Tuple[int, int]] = None,
**kwargs,
):
super().__init__(
task=task,
normalized_config=normalized_config,
batch_size=batch_size,
sequence_length=sequence_length,
random_batch_size_range=random_batch_size_range,
random_sequence_length_range=random_sequence_length_range,
**kwargs,
)
self.num_kv_heads = self.num_kv_heads = (
normalized_config.num_kv_heads
if (normalized_config.new_decoder_architecture or not normalized_config.multi_query)
else 1
)
self.head_dim = self.hidden_size // self.num_attention_heads

def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
past_key_shape = (
self.batch_size,
self.num_kv_heads,
self.sequence_length,
head_dim,
self.head_dim,
)
past_value_shape = (
self.batch_size,
self.num_kv_heads,
self.sequence_length,
head_dim,
self.head_dim,
)
return [
(
Expand Down
4 changes: 3 additions & 1 deletion optimum/utils/normalized_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ class NormalizedConfigManager:
"blenderbot": BartLikeNormalizedTextConfig,
"blenderbot-small": BartLikeNormalizedTextConfig,
"bloom": NormalizedTextConfig.with_args(num_layers="n_layer"),
"falcon": NormalizedTextConfig.with_args(num_layers="num_hidden_layers", num_attention_heads="num_kv_heads"),
"falcon": NormalizedTextConfig.with_args(
num_layers="num_hidden_layers", num_attention_heads="num_attention_heads"
),
"camembert": NormalizedTextConfig,
"codegen": GPT2LikeNormalizedTextConfig,
"cvt": NormalizedVisionConfig,
Expand Down
2 changes: 1 addition & 1 deletion tests/bettertransformer/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"distilbert": "hf-internal-testing/tiny-random-DistilBertModel",
"electra": "hf-internal-testing/tiny-random-ElectraModel",
"ernie": "hf-internal-testing/tiny-random-ErnieModel",
"falcon": "Rocketknight1/tiny-random-falcon-7b",
"falcon": "fxmarty/really-tiny-falcon-testing",
"fsmt": "hf-internal-testing/tiny-random-FSMTModel",
"gpt2": "hf-internal-testing/tiny-random-GPT2Model",
# NOTE: this tiny model does not use attention_softmax_in_fp32=True (contrary to e.g. starcoder)
Expand Down

0 comments on commit 099cd73

Please sign in to comment.