Skip to content

Commit

Permalink
[ORT] Improve dummy mask & add tips for attention fusion in the doc (#…
Browse files Browse the repository at this point in the history
…1640)

* improve dummy mask

* apply suggestion and fix

---------

Co-authored-by: JingyaHuang <[email protected]>
  • Loading branch information
JingyaHuang and JingyaHuang authored Jan 11, 2024
1 parent a5e9bea commit 7a8c563
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 2 deletions.
6 changes: 6 additions & 0 deletions docs/source/onnxruntime/usage_guides/optimization.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ Here is a list of the possible optimizations you can enable:
- Add Bias and Gelu / FastGelu fusion with `disable_bias_gelu_fusion=False`,
- Gelu approximation with `enable_gelu_approximation=True`.

<Tip>

Attention fusion is designed for right-side padding for BERT-like architectures (eg. BERT, RoBERTa, VIT, etc.) and for left-side padding for generative models (GPT-like). If you are not following the convention, please set `use_raw_attention_mask=True` to avoid potential accuracy issues but sacrifice the performance.

</Tip>

While [`~onnxruntime.configuration.OptimizationConfig`] gives you full control on how to do optimization, it can be hard to know what to enable / disable. Instead, you can use [`~onnxruntime.configuration.AutoOptimizationConfig`] which provides four common optimization levels:
- O1: basic general optimizations.
- O2: basic and extended general optimizations, transformers-specific fusions.
Expand Down
68 changes: 66 additions & 2 deletions optimum/utils/input_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,59 @@ def random_int_tensor(
else:
return np.random.randint(min_value, high=max_value, size=shape, dtype=DTYPE_MAPPER.np(dtype))

@staticmethod
@check_framework_is_available
def random_mask_tensor(shape: List[int], padding_side: str = "right", framework: str = "pt", dtype: str = "int64"):
"""
Generates a mask tensor either right or left padded.
Args:
shape (`List[int]`):
The shape of the random tensor.
padding_side (`str`, defaults to "right"):
The side on which the padding is applied.
framework (`str`, defaults to `"pt"`):
The requested framework.
dtype (`str`, defaults to `"int64"`):
The dtype of the generated integer tensor. Could be "int64", "int32", "int8".
Returns:
A random mask tensor either left padded or right padded in the requested framework.
"""
shape = tuple(shape)
mask_length = random.randint(1, shape[-1] - 1)
if framework == "pt":
mask_tensor = torch.cat(
[
torch.ones(*shape[:-1], shape[-1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
torch.zeros(*shape[:-1], mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
],
dim=-1,
)
if padding_side == "left":
mask_tensor = torch.flip(mask_tensor, [-1])
elif framework == "tf":
mask_tensor = tf.concat(
[
tf.ones((*shape[:-1], shape[-1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
tf.zeros((*shape[:-1], mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
],
axis=-1,
)
if padding_side == "left":
mask_tensor = tf.reverse(mask_tensor, [-1])
else:
mask_tensor = np.concatenate(
[
np.ones((*shape[:-1], shape[-1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)),
np.zeros((*shape[:-1], mask_length), dtype=DTYPE_MAPPER.np(dtype)),
],
axis=-1,
)
if padding_side == "left":
mask_tensor = np.flip(mask_tensor, [-1])
return mask_tensor

@staticmethod
@check_framework_is_available
def random_float_tensor(
Expand Down Expand Up @@ -344,6 +397,7 @@ def __init__(
random_batch_size_range: Optional[Tuple[int, int]] = None,
random_sequence_length_range: Optional[Tuple[int, int]] = None,
random_num_choices_range: Optional[Tuple[int, int]] = None,
padding_side: str = "right",
**kwargs,
):
self.task = task
Expand All @@ -363,14 +417,24 @@ def __init__(
self.num_choices = random.randint(low, high)
else:
self.num_choices = num_choices
self.padding_side = padding_side

def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
def generate(
self,
input_name: str,
framework: str = "pt",
int_dtype: str = "int64",
float_dtype: str = "fp32",
):
min_value = 0
max_value = 2 if input_name != "input_ids" else self.vocab_size
shape = [self.batch_size, self.sequence_length]
if self.task == "multiple-choice":
shape = [self.batch_size, self.num_choices, self.sequence_length]
return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype)
if "mask" in input_name:
return self.random_mask_tensor(shape, padding_side=self.padding_side, framework=framework, dtype=int_dtype)
else:
return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype)


class DummyDecoderTextInputGenerator(DummyTextInputGenerator):
Expand Down

0 comments on commit 7a8c563

Please sign in to comment.