Skip to content

Commit

Permalink
Remove more kwargs
Browse files Browse the repository at this point in the history
  • Loading branch information
amyeroberts committed May 15, 2024
1 parent 9b72cc1 commit fe89540
Show file tree
Hide file tree
Showing 7 changed files with 4 additions and 82 deletions.
2 changes: 0 additions & 2 deletions src/transformers/models/cohere/modeling_cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,6 @@ def forward(
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
"""
Args:
Expand Down Expand Up @@ -663,7 +662,6 @@ def forward(
output_attentions=output_attentions,
use_cache=use_cache,
cache_position=cache_position,
**kwargs,
)

# Fully Connected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,7 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
Expand All @@ -569,12 +567,8 @@ def forward(
key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

# if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder
is_cross_attention = key_value_states is not None
Expand Down Expand Up @@ -798,7 +792,6 @@ def forward(
attention_mask: torch.Tensor,
object_queries: torch.Tensor = None,
output_attentions: bool = False,
**kwargs,
):
"""
Args:
Expand All @@ -812,9 +805,6 @@ def forward(
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

residual = hidden_states
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
Expand Down Expand Up @@ -903,7 +893,6 @@ def forward(
encoder_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
is_first: Optional[bool] = False,
**kwargs,
):
"""
Args:
Expand All @@ -926,9 +915,6 @@ def forward(
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

residual = hidden_states

# ========== Begin of Self-Attention =============
Expand Down Expand Up @@ -1189,7 +1175,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
Expand All @@ -1216,9 +1201,6 @@ def forward(
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down Expand Up @@ -1317,7 +1299,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
Expand Down Expand Up @@ -1354,9 +1335,6 @@ def forward(
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down
24 changes: 1 addition & 23 deletions src/transformers/models/detr/modeling_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,9 +524,7 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
Expand All @@ -537,12 +535,8 @@ def forward(
key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

# if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder
is_cross_attention = key_value_states is not None
Expand Down Expand Up @@ -648,7 +642,6 @@ def forward(
attention_mask: torch.Tensor,
object_queries: torch.Tensor = None,
output_attentions: bool = False,
**kwargs,
):
"""
Args:
Expand All @@ -662,9 +655,6 @@ def forward(
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

residual = hidden_states
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
Expand Down Expand Up @@ -734,7 +724,6 @@ def forward(
encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
**kwargs,
):
"""
Args:
Expand All @@ -757,9 +746,6 @@ def forward(
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

residual = hidden_states

# Self Attention
Expand Down Expand Up @@ -947,7 +933,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
Expand All @@ -974,9 +959,6 @@ def forward(
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down Expand Up @@ -1068,7 +1050,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
Expand Down Expand Up @@ -1106,9 +1087,6 @@ def forward(
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/llama/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, s
self.register_buffer("inv_freq", inv_freq, persistent=False)
# For BC we register cos and sin cached
self.max_seq_len_cached = max_position_embeddings
t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
t = t / self.scaling_factor
freqs = torch.outer(t, self.inv_freq)

@torch.no_grad()
def forward(self, x, position_ids):
Expand Down
17 changes: 1 addition & 16 deletions src/transformers/models/maskformer/modeling_maskformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,10 +440,7 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
Expand All @@ -454,12 +451,8 @@ def forward(
key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

# if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder
is_cross_attention = key_value_states is not None
Expand Down Expand Up @@ -577,7 +570,6 @@ def forward(
encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False,
**kwargs,
):
"""
Args:
Expand All @@ -600,9 +592,6 @@ def forward(
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

residual = hidden_states

# Self Attention
Expand Down Expand Up @@ -690,7 +679,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
Expand Down Expand Up @@ -727,9 +715,6 @@ def forward(
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/olmo/modeling_olmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,6 @@ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, s
self.register_buffer("inv_freq", inv_freq, persistent=False)
# For BC we register cos and sin cached
self.max_seq_len_cached = max_position_embeddings
t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
t = t / self.scaling_factor
freqs = torch.outer(t, self.inv_freq)

@torch.no_grad()
def forward(self, x, position_ids):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,10 +461,7 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
Expand All @@ -475,12 +472,8 @@ def forward(
key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

# if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder
is_cross_attention = key_value_states is not None
Expand Down Expand Up @@ -1000,7 +993,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
Expand Down Expand Up @@ -1038,9 +1030,6 @@ def forward(
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down

0 comments on commit fe89540

Please sign in to comment.