diff --git a/src/transformers/models/deprecated/deta/image_processing_deta.py b/src/transformers/models/deprecated/deta/image_processing_deta.py index 57a9584397df76..9e660198a2e814 100644 --- a/src/transformers/models/deprecated/deta/image_processing_deta.py +++ b/src/transformers/models/deprecated/deta/image_processing_deta.py @@ -78,7 +78,6 @@ SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) -# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]: """ Computes the output image size given the input image size and the desired output size. @@ -110,7 +109,6 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in return (oh, ow) -# Copied from transformers.models.detr.image_processing_detr.get_resize_output_image_size def get_resize_output_image_size( input_image: np.ndarray, size: Union[int, Tuple[int, int], List[int]], @@ -139,7 +137,6 @@ def get_resize_output_image_size( return get_size_with_aspect_ratio(image_size, size, max_size) -# Copied from transformers.models.detr.image_processing_detr.get_image_size_for_max_height_width def get_image_size_for_max_height_width( input_image: np.ndarray, max_height: int, @@ -175,7 +172,6 @@ def get_image_size_for_max_height_width( return new_height, new_width -# Copied from transformers.models.detr.image_processing_detr.get_numpy_to_framework_fn def get_numpy_to_framework_fn(arr) -> Callable: """ Returns a function that converts a numpy array to the framework of the input array. @@ -200,7 +196,6 @@ def get_numpy_to_framework_fn(arr) -> Callable: raise ValueError(f"Cannot convert arrays of type {type(arr)}") -# Copied from transformers.models.detr.image_processing_detr.safe_squeeze def safe_squeeze(arr: np.ndarray, axis: Optional[int] = None) -> np.ndarray: """ Squeezes an array, but only if the axis specified has dim 1. @@ -214,7 +209,6 @@ def safe_squeeze(arr: np.ndarray, axis: Optional[int] = None) -> np.ndarray: return arr -# Copied from transformers.models.detr.image_processing_detr.normalize_annotation def normalize_annotation(annotation: Dict, image_size: Tuple[int, int]) -> Dict: image_height, image_width = image_size norm_annotation = {} @@ -229,7 +223,6 @@ def normalize_annotation(annotation: Dict, image_size: Tuple[int, int]) -> Dict: return norm_annotation -# Copied from transformers.models.detr.image_processing_detr.max_across_indices def max_across_indices(values: Iterable[Any]) -> List[Any]: """ Return the maximum value across all indices of an iterable of values. @@ -237,7 +230,6 @@ def max_across_indices(values: Iterable[Any]) -> List[Any]: return [max(values_i) for values_i in zip(*values)] -# Copied from transformers.models.detr.image_processing_detr.get_max_height_width def get_max_height_width( images: List[np.ndarray], input_data_format: Optional[Union[str, ChannelDimension]] = None ) -> List[int]: @@ -256,7 +248,6 @@ def get_max_height_width( return (max_height, max_width) -# Copied from transformers.models.detr.image_processing_detr.make_pixel_mask def make_pixel_mask( image: np.ndarray, output_size: Tuple[int, int], input_data_format: Optional[Union[str, ChannelDimension]] = None ) -> np.ndarray: @@ -275,7 +266,6 @@ def make_pixel_mask( return mask -# Copied from transformers.models.detr.image_processing_detr.convert_coco_poly_to_mask def convert_coco_poly_to_mask(segmentations, height: int, width: int) -> np.ndarray: """ Convert a COCO polygon annotation to a mask. @@ -310,7 +300,6 @@ def convert_coco_poly_to_mask(segmentations, height: int, width: int) -> np.ndar return masks -# Copied from transformers.models.detr.image_processing_detr.prepare_coco_detection_annotation with DETR->DETA def prepare_coco_detection_annotation( image, target, @@ -371,7 +360,6 @@ def prepare_coco_detection_annotation( return new_target -# Copied from transformers.models.detr.image_processing_detr.masks_to_boxes def masks_to_boxes(masks: np.ndarray) -> np.ndarray: """ Compute the bounding boxes around the provided panoptic segmentation masks. @@ -406,7 +394,6 @@ def masks_to_boxes(masks: np.ndarray) -> np.ndarray: return np.stack([x_min, y_min, x_max, y_max], 1) -# Copied from transformers.models.detr.image_processing_detr.prepare_coco_panoptic_annotation with DETR->DETA def prepare_coco_panoptic_annotation( image: np.ndarray, target: Dict, @@ -448,7 +435,6 @@ def prepare_coco_panoptic_annotation( return new_target -# Copied from transformers.models.detr.image_processing_detr.resize_annotation def resize_annotation( annotation: Dict[str, Any], orig_size: Tuple[int, int], @@ -594,7 +580,6 @@ def __init__( self.do_pad = do_pad self.pad_size = pad_size - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_annotation with DETR->DETA def prepare_annotation( self, image: np.ndarray, @@ -683,7 +668,6 @@ def resize( ) return image - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize_annotation def resize_annotation( self, annotation, @@ -697,7 +681,6 @@ def resize_annotation( """ return resize_annotation(annotation, orig_size=orig_size, target_size=size, resample=resample) - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.rescale def rescale( self, image: np.ndarray, @@ -726,7 +709,6 @@ def rescale( """ return rescale(image, rescale_factor, data_format=data_format, input_data_format=input_data_format) - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.normalize_annotation def normalize_annotation(self, annotation: Dict, image_size: Tuple[int, int]) -> Dict: """ Normalize the boxes in the annotation from `[top_left_x, top_left_y, bottom_right_x, bottom_right_y]` to @@ -734,7 +716,6 @@ def normalize_annotation(self, annotation: Dict, image_size: Tuple[int, int]) -> """ return normalize_annotation(annotation, image_size=image_size) - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor._update_annotation_for_padded_image def _update_annotation_for_padded_image( self, annotation: Dict, @@ -778,7 +759,6 @@ def _update_annotation_for_padded_image( new_annotation[key] = value return new_annotation - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor._pad_image def _pad_image( self, image: np.ndarray, @@ -812,7 +792,6 @@ def _pad_image( ) return padded_image, annotation - # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.pad def pad( self, images: List[np.ndarray], diff --git a/src/transformers/models/deprecated/deta/modeling_deta.py b/src/transformers/models/deprecated/deta/modeling_deta.py index 03341f0ab8dcbf..bc195749399e2d 100644 --- a/src/transformers/models/deprecated/deta/modeling_deta.py +++ b/src/transformers/models/deprecated/deta/modeling_deta.py @@ -52,7 +52,6 @@ MultiScaleDeformableAttention = None -# Copied from models.deformable_detr.load_cuda_kernels def load_cuda_kernels(): from torch.utils.cpp_extension import load @@ -83,7 +82,6 @@ def load_cuda_kernels(): ) -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.MultiScaleDeformableAttentionFunction class MultiScaleDeformableAttentionFunction(Function): @staticmethod def forward( @@ -152,7 +150,6 @@ def backward(context, grad_output): @dataclass -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrDecoderOutput with DeformableDetr->Deta class DetaDecoderOutput(ModelOutput): """ Base class for outputs of the DetaDecoder. This class adds two attributes to @@ -344,7 +341,6 @@ def inverse_sigmoid(x, eps=1e-5): return torch.log(x1 / x2) -# Copied from transformers.models.detr.modeling_detr.DetrFrozenBatchNorm2d with Detr->Deta class DetaFrozenBatchNorm2d(nn.Module): """ BatchNorm2d where the batch statistics and the affine parameters are fixed. @@ -384,7 +380,6 @@ def forward(self, x): return x * scale + bias -# Copied from transformers.models.detr.modeling_detr.replace_batch_norm with Detr->Deta def replace_batch_norm(model): r""" Recursively replace all `torch.nn.BatchNorm2d` with `DetaFrozenBatchNorm2d`. @@ -454,7 +449,6 @@ def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): return out, pos -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrSinePositionEmbedding with DeformableDetr->Deta class DetaSinePositionEmbedding(nn.Module): """ This is a more standard version of the position embedding, very similar to the one used by the Attention is all you @@ -493,7 +487,6 @@ def forward(self, pixel_values, pixel_mask): return pos -# Copied from transformers.models.detr.modeling_detr.DetrLearnedPositionEmbedding class DetaLearnedPositionEmbedding(nn.Module): """ This module learns positional embeddings up to a fixed maximum size. @@ -517,7 +510,6 @@ def forward(self, pixel_values, pixel_mask=None): return pos -# Copied from transformers.models.detr.modeling_detr.build_position_encoding with Detr->Deta def build_position_encoding(config): n_steps = config.d_model // 2 if config.position_embedding_type == "sine": @@ -531,7 +523,6 @@ def build_position_encoding(config): return position_embedding -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.multi_scale_deformable_attention def multi_scale_deformable_attention( value: Tensor, value_spatial_shapes: Tensor, sampling_locations: Tensor, attention_weights: Tensor ) -> Tensor: @@ -571,7 +562,6 @@ def multi_scale_deformable_attention( return output.transpose(1, 2).contiguous() -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrMultiscaleDeformableAttention with DeformableDetr->Deta class DetaMultiscaleDeformableAttention(nn.Module): """ Multiscale deformable attention as proposed in Deformable DETR. @@ -715,7 +705,6 @@ def forward( return output, attention_weights -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrMultiheadAttention with DeformableDetr->Deta,Deformable DETR->DETA class DetaMultiheadAttention(nn.Module): """ Multi-headed attention from 'Attention Is All You Need' paper. @@ -1506,11 +1495,9 @@ def __init__(self, config: DetaConfig): self.post_init() - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrModel.get_encoder def get_encoder(self): return self.encoder - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrModel.get_decoder def get_decoder(self): return self.decoder @@ -1522,7 +1509,6 @@ def unfreeze_backbone(self): for name, param in self.backbone.model.named_parameters(): param.requires_grad_(True) - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrModel.get_valid_ratio def get_valid_ratio(self, mask, dtype=torch.float32): """Get the valid ratio of all feature maps.""" @@ -1534,7 +1520,6 @@ def get_valid_ratio(self, mask, dtype=torch.float32): valid_ratio = torch.stack([valid_ratio_width, valid_ratio_height], -1) return valid_ratio - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrModel.get_proposal_pos_embed def get_proposal_pos_embed(self, proposals): """Get the position embedding of the proposals.""" @@ -1869,7 +1854,6 @@ class DetaForObjectDetection(DetaPreTrainedModel): # We can't initialize the model on meta device as some weights are modified during the initialization _no_split_modules = None - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrForObjectDetection.__init__ with DeformableDetr->Deta def __init__(self, config: DetaConfig): super().__init__(config) @@ -2105,7 +2089,6 @@ def forward( return dict_outputs -# Copied from transformers.models.detr.modeling_detr.dice_loss def dice_loss(inputs, targets, num_boxes): """ Compute the DICE loss, similar to generalized IOU for masks @@ -2125,7 +2108,6 @@ def dice_loss(inputs, targets, num_boxes): return loss.sum() / num_boxes -# Copied from transformers.models.detr.modeling_detr.sigmoid_focal_loss def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2): """ Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. @@ -2197,7 +2179,6 @@ def __init__( if self.assign_second_stage: self.stg2_assigner = DetaStage2Assigner(num_queries) - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrLoss.loss_labels def loss_labels(self, outputs, targets, indices, num_boxes): """ Classification loss (Binary focal loss) targets dicts must contain the key "class_labels" containing a tensor @@ -2232,7 +2213,6 @@ def loss_labels(self, outputs, targets, indices, num_boxes): return losses @torch.no_grad() - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrLoss.loss_cardinality def loss_cardinality(self, outputs, targets, indices, num_boxes): """ Compute the cardinality error, i.e. the absolute error in the number of predicted non-empty boxes. @@ -2248,7 +2228,6 @@ def loss_cardinality(self, outputs, targets, indices, num_boxes): losses = {"cardinality_error": card_err} return losses - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrLoss.loss_boxes def loss_boxes(self, outputs, targets, indices, num_boxes): """ Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss. @@ -2273,21 +2252,18 @@ def loss_boxes(self, outputs, targets, indices, num_boxes): losses["loss_giou"] = loss_giou.sum() / num_boxes return losses - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrLoss._get_source_permutation_idx def _get_source_permutation_idx(self, indices): # permute predictions following indices batch_idx = torch.cat([torch.full_like(source, i) for i, (source, _) in enumerate(indices)]) source_idx = torch.cat([source for (source, _) in indices]) return batch_idx, source_idx - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrLoss._get_target_permutation_idx def _get_target_permutation_idx(self, indices): # permute targets following indices batch_idx = torch.cat([torch.full_like(target, i) for i, (_, target) in enumerate(indices)]) target_idx = torch.cat([target for (_, target) in indices]) return batch_idx, target_idx - # Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrLoss.get_loss def get_loss(self, loss, outputs, targets, indices, num_boxes): loss_map = { "labels": self.loss_labels, @@ -2360,7 +2336,6 @@ def forward(self, outputs, targets): return losses -# Copied from transformers.models.detr.modeling_detr.DetrMLPPredictionHead class DetaMLPPredictionHead(nn.Module): """ Very simple multi-layer perceptron (MLP, also called FFN), used to predict the normalized center coordinates, @@ -2382,7 +2357,6 @@ def forward(self, x): return x -# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrHungarianMatcher with DeformableDetr->Deta class DetaHungarianMatcher(nn.Module): """ This class computes an assignment between the targets and the predictions of the network. @@ -2463,7 +2437,6 @@ def forward(self, outputs, targets): return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] -# Copied from transformers.models.detr.modeling_detr._upcast def _upcast(t: Tensor) -> Tensor: # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type if t.is_floating_point(): @@ -2472,7 +2445,6 @@ def _upcast(t: Tensor) -> Tensor: return t if t.dtype in (torch.int32, torch.int64) else t.int() -# Copied from transformers.models.detr.modeling_detr.box_area def box_area(boxes: Tensor) -> Tensor: """ Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. @@ -2489,7 +2461,6 @@ def box_area(boxes: Tensor) -> Tensor: return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) -# Copied from transformers.models.detr.modeling_detr.box_iou def box_iou(boxes1, boxes2): area1 = box_area(boxes1) area2 = box_area(boxes2) @@ -2506,7 +2477,6 @@ def box_iou(boxes1, boxes2): return iou, union -# Copied from transformers.models.detr.modeling_detr.generalized_box_iou def generalized_box_iou(boxes1, boxes2): """ Generalized IoU from https://giou.stanford.edu/. The boxes should be in [x0, y0, x1, y1] (corner) format. diff --git a/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py index 461490c7f5790e..306790021a7bb1 100644 --- a/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py @@ -239,7 +239,6 @@ def forward(self, hidden_state: torch.Tensor) -> torch.Tensor: return hidden_state -# Copied from transformers.models.convnext.modeling_convnext.drop_path def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor: """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). @@ -260,7 +259,6 @@ def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = Fals return output -# Copied from transformers.models.beit.modeling_beit.BeitDropPath with Beit->EfficientFormer class EfficientFormerDropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" diff --git a/src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py b/src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py index d8349ee5aa4400..68d270874c9135 100755 --- a/src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py +++ b/src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py @@ -86,7 +86,6 @@ def forward( return embeddings -# Copied from transformers.models.bert.modeling_bert.BertSelfAttention with Bert->ErnieM,self.value->self.v_proj,self.key->self.k_proj,self.query->self.q_proj class ErnieMSelfAttention(nn.Module): def __init__(self, config, position_embedding_type=None): super().__init__() @@ -380,7 +379,6 @@ def forward( ) -# Copied from transformers.models.bert.modeling_bert.BertPooler with Bert->ErnieM class ErnieMPooler(nn.Module): def __init__(self, config): super().__init__() @@ -599,7 +597,6 @@ def forward( ERNIE_M_START_DOCSTRING, ) class ErnieMForSequenceClassification(ErnieMPreTrainedModel): - # Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification.__init__ with Bert->ErnieM,bert->ernie_m def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels @@ -701,7 +698,6 @@ def forward( ERNIE_M_START_DOCSTRING, ) class ErnieMForMultipleChoice(ErnieMPreTrainedModel): - # Copied from transformers.models.bert.modeling_bert.BertForMultipleChoice.__init__ with Bert->ErnieM,bert->ernie_m def __init__(self, config): super().__init__(config) @@ -791,7 +787,6 @@ def forward( ERNIE_M_START_DOCSTRING, ) class ErnieMForTokenClassification(ErnieMPreTrainedModel): - # Copied from transformers.models.bert.modeling_bert.BertForTokenClassification.__init__ with Bert->ErnieM,bert->ernie_m def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels @@ -872,7 +867,6 @@ def forward( ERNIE_M_START_DOCSTRING, ) class ErnieMForQuestionAnswering(ErnieMPreTrainedModel): - # Copied from transformers.models.bert.modeling_bert.BertForQuestionAnswering.__init__ with Bert->ErnieM,bert->ernie_m def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels @@ -968,7 +962,6 @@ def forward( compute `start_prob` and `end_prob`, designed for Universal Information Extraction.""", ERNIE_M_START_DOCSTRING, ) -# Copied from paddlenlp.transformers.ernie_m.modeling.UIEM class ErnieMForInformationExtraction(ErnieMPreTrainedModel): def __init__(self, config): super(ErnieMForInformationExtraction, self).__init__(config) diff --git a/src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py index 5129c1091ba3e2..c7a195dbea0eb6 100644 --- a/src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py @@ -45,7 +45,6 @@ #################################################### -# Copied from transformers.models.switch_transformers.modeling_switch_transformers.router_z_loss_func def router_z_loss_func(router_logits: torch.Tensor) -> float: r""" Compute the router z-loss implemented in PyTorch. @@ -66,7 +65,6 @@ def router_z_loss_func(router_logits: torch.Tensor) -> float: return torch.sum(z_loss) / (num_groups * tokens_per_group) -# Copied from transformers.models.switch_transformers.modeling_switch_transformers.load_balancing_loss_func def load_balancing_loss_func(router_probs: torch.Tensor, expert_indices: torch.Tensor) -> float: r""" Computes auxiliary load balancing loss as in Switch Transformer - implemented in Pytorch. @@ -140,7 +138,6 @@ def forward(self, hidden_states): return hidden_states -# Copied from transformers.models.switch_transformers.modeling_switch_transformers.SwitchTransformersTop1Router with SwitchTransformers->GPTSanJapanese class GPTSanJapaneseTop1Router(nn.Module): """ Router using tokens choose top-1 experts assignment. @@ -234,7 +231,6 @@ def forward(self, hidden_states: torch.Tensor) -> Tuple: return expert_index, router_probs, router_logits -# Copied from transformers.models.switch_transformers.modeling_switch_transformers.SwitchTransformersSparseMLP with SwitchTransformers->GPTSanJapanese class GPTSanJapaneseSparseMLP(nn.Module): r""" Implementation of the Switch Transformers Sparse MLP module. @@ -345,7 +341,6 @@ def forward(self, hidden_states): return output -# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->GPTSanJapanese class GPTSanJapaneseAttention(nn.Module): """Multi-headed attention from 'Attention Is All You Need' paper""" @@ -749,7 +744,6 @@ def _init_weights(self, module): module.experts[f"expert_{idx}"].wi.weight.data.normal_(mean=0.0, std=factor * (d_model**-0.5)) module.experts[f"expert_{idx}"].wo.weight.data.normal_(mean=0.0, std=factor * (d_model**-0.5)) - # Copied from transformers.models.t5.modeling_t5.T5PreTrainedModel._shift_right def _shift_right(self, input_ids): decoder_start_token_id = self.config.decoder_start_token_id pad_token_id = self.config.pad_token_id @@ -1298,17 +1292,14 @@ def prepare_inputs_for_generation( "past_key_values": None, } - # Copied from transformers.models.switch_transformers.modeling_switch_transformers.SwitchTransformersForConditionalGeneration.prepare_decoder_input_ids_from_labels with SwitchTransformers->GPTSanJapanese def prepare_decoder_input_ids_from_labels(self, labels: torch.Tensor): return self._shift_right(labels) - # Copied from transformers.models.mbart.modeling_mbart.MBartForConditionalGeneration.resize_token_embeddings with MBart->GPTSanJapanese def resize_token_embeddings(self, new_num_tokens: int, pad_to_multiple_of: Optional[int] = None) -> nn.Embedding: new_embeddings = super().resize_token_embeddings(new_num_tokens, pad_to_multiple_of) self._resize_final_logits_bias(new_embeddings.weight.shape[0]) return new_embeddings - # Copied from transformers.models.mbart.modeling_mbart.MBartForConditionalGeneration._resize_final_logits_bias with MBart->GPTSanJapanese def _resize_final_logits_bias(self, new_num_tokens: int) -> None: old_num_tokens = self.final_logits_bias.shape[-1] if new_num_tokens <= old_num_tokens: @@ -1324,15 +1315,12 @@ def get_input_embeddings(self): def set_input_embeddings(self, new_embeddings): self.model.set_input_embeddings(new_embeddings) - # Copied from transformers.models.switch_transformers.modeling_switch_transformers.SwitchTransformersForConditionalGeneration.set_output_embeddings with SwitchTransformers->GPTSanJapanese def set_output_embeddings(self, new_embeddings): self.lm_head = new_embeddings - # Copied from transformers.models.switch_transformers.modeling_switch_transformers.SwitchTransformersForConditionalGeneration.get_output_embeddings with SwitchTransformers->GPTSanJapanese def get_output_embeddings(self): return self.lm_head - # Copied from transformers.models.switch_transformers.modeling_switch_transformers.SwitchTransformersForConditionalGeneration._unpack_router_logits with SwitchTransformers->GPTSanJapanese def _unpack_router_logits(self, router_outputs): total_router_logits = [] total_expert_indexes = [] diff --git a/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py index e86aa47c1afece..51789e49b2d263 100644 --- a/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py @@ -179,25 +179,20 @@ def __init__( ) @property - # Copied from tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizer.vocab_size def vocab_size(self): # self.vocab contains support for character fluctuation unique to Japanese, and has a large number of vocab return len(self.raw_vocab) - # Copied from tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizer.get_vocab def get_vocab(self): return dict(self.raw_vocab, **self.added_tokens_encoder) - # Copied from tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizer._tokenize def _tokenize(self, text): return self.subword_tokenizer.tokenize(text, clean=self.do_clean_text) - # Copied from tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizer._convert_token_to_id def _convert_token_to_id(self, token): """Converts a token (str) in an id using the vocab.""" return self.vocab.get(token, self.vocab.get(self.unk_token)) - # Copied from tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizer._convert_id_to_token def _convert_id_to_token(self, index): """Converts an index (integer) in a token (str) using the vocab.""" return self.subword_tokenizer.convert_id_to_token(index) @@ -254,7 +249,6 @@ def default_chat_template(self): "{% endfor %}" ) - # Copied from tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizer.save_vocabulary def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: index = 0 if os.path.isdir(save_directory): @@ -412,7 +406,6 @@ class SubWordJapaneseTokenizer(object): SOFTWARE. """ - # Copied from tokenization_gpt_neox_japanese.SubWordJapaneseTokenizer.__init__ def __init__(self, vocab, ids_to_tokens, emoji): self.vocab = vocab # same as swe self.ids_to_tokens = ids_to_tokens # same as bpe @@ -434,11 +427,9 @@ def __init__(self, vocab, ids_to_tokens, emoji): blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟" self.content_trans1 = str.maketrans({k: "" for k in keisen + blocks}) - # Copied from tokenization_gpt_neox_japanese.SubWordJapaneseTokenizer.__len__ def __len__(self): return len(self.ids_to_tokens) - # Copied from tokenization_gpt_neox_japanese.SubWordJapaneseTokenizer.clean_text def clean_text(self, content): content = self.content_repatter1.sub("", content) content = self.content_repatter2.sub("", content) @@ -451,7 +442,6 @@ def clean_text(self, content): content = content.replace("", "") return content - # Copied from tokenization_gpt_neox_japanese.SubWordJapaneseTokenizer.tokenize def tokenize(self, text, clean=False): text = text.replace(" ", "") text = text.replace(" ", "") diff --git a/src/transformers/models/deprecated/nat/modeling_nat.py b/src/transformers/models/deprecated/nat/modeling_nat.py index 58d92ada0b1543..b3827f3787eff9 100644 --- a/src/transformers/models/deprecated/nat/modeling_nat.py +++ b/src/transformers/models/deprecated/nat/modeling_nat.py @@ -256,7 +256,6 @@ def forward(self, input_feature: torch.Tensor) -> torch.Tensor: return input_feature -# Copied from transformers.models.beit.modeling_beit.drop_path def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor: """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). @@ -277,7 +276,6 @@ def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = Fals return output -# Copied from transformers.models.beit.modeling_beit.BeitDropPath with Beit->Nat class NatDropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" diff --git a/src/transformers/models/deprecated/nezha/modeling_nezha.py b/src/transformers/models/deprecated/nezha/modeling_nezha.py index ef20396c00810f..3346a4f835a329 100644 --- a/src/transformers/models/deprecated/nezha/modeling_nezha.py +++ b/src/transformers/models/deprecated/nezha/modeling_nezha.py @@ -346,7 +346,6 @@ def forward( return outputs -# Copied from transformers.models.bert.modeling_bert.BertSelfOutput with Bert->Nezha class NezhaSelfOutput(nn.Module): def __init__(self, config): super().__init__() @@ -410,7 +409,6 @@ def forward( return outputs -# Copied from transformers.models.bert.modeling_bert.BertIntermediate with Bert->Nezha class NezhaIntermediate(nn.Module): def __init__(self, config): super().__init__() @@ -426,7 +424,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return hidden_states -# Copied from transformers.models.bert.modeling_bert.BertOutput with Bert->Nezha class NezhaOutput(nn.Module): def __init__(self, config): super().__init__() @@ -527,7 +524,6 @@ def feed_forward_chunk(self, attention_output): return layer_output -# Copied from transformers.models.bert.modeling_bert.BertEncoder with Bert->Nezha class NezhaEncoder(nn.Module): def __init__(self, config): super().__init__() @@ -621,7 +617,6 @@ def forward( ) -# Copied from transformers.models.bert.modeling_bert.BertPooler with Bert->Nezha class NezhaPooler(nn.Module): def __init__(self, config): super().__init__() @@ -637,7 +632,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return pooled_output -# Copied from transformers.models.bert.modeling_bert.BertPredictionHeadTransform with Bert->Nezha class NezhaPredictionHeadTransform(nn.Module): def __init__(self, config): super().__init__() @@ -655,7 +649,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return hidden_states -# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->Nezha class NezhaLMPredictionHead(nn.Module): def __init__(self, config): super().__init__() @@ -679,7 +672,6 @@ def forward(self, hidden_states): return hidden_states -# Copied from transformers.models.bert.modeling_bert.BertOnlyMLMHead with Bert->Nezha class NezhaOnlyMLMHead(nn.Module): def __init__(self, config): super().__init__() @@ -690,7 +682,6 @@ def forward(self, sequence_output: torch.Tensor) -> torch.Tensor: return prediction_scores -# Copied from transformers.models.bert.modeling_bert.BertOnlyNSPHead with Bert->Nezha class NezhaOnlyNSPHead(nn.Module): def __init__(self, config): super().__init__() @@ -701,7 +692,6 @@ def forward(self, pooled_output): return seq_relationship_score -# Copied from transformers.models.bert.modeling_bert.BertPreTrainingHeads with Bert->Nezha class NezhaPreTrainingHeads(nn.Module): def __init__(self, config): super().__init__() diff --git a/src/transformers/models/deprecated/open_llama/configuration_open_llama.py b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py index 259fd193679dcf..e20c33f24a322a 100644 --- a/src/transformers/models/deprecated/open_llama/configuration_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py @@ -145,7 +145,6 @@ def __init__( **kwargs, ) - # Copied from transformers.models.llama.configuration_llama.LlamaConfig._rope_scaling_validation def _rope_scaling_validation(self): """ Validate the `rope_scaling` configuration. diff --git a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py index e748529c9e01d5..7d2098f2f63fff 100644 --- a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py @@ -46,7 +46,6 @@ _CONFIG_FOR_DOC = "OpenLlamaConfig" -# Copied from transformers.models.llama.modeling_llama.LlamaRMSNorm with Llama->OpenLlama class OpenLlamaRMSNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): """ @@ -64,7 +63,6 @@ def forward(self, hidden_states): return self.weight * hidden_states.to(input_dtype) -# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->OpenLlama class OpenLlamaRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -101,7 +99,6 @@ def forward(self, x, seq_len=None): ) -# Copied from transformers.models.falcon.modeling_falcon.FalconLinearScalingRotaryEmbedding with Falcon->OpenLlama class OpenLlamaLinearScalingRotaryEmbedding(OpenLlamaRotaryEmbedding): """OpenLlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev""" @@ -121,7 +118,6 @@ def _set_cos_sin_cache(self, seq_len, device, dtype): self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) -# Copied from transformers.models.falcon.modeling_falcon.FalconDynamicNTKScalingRotaryEmbedding with Falcon->OpenLlama class OpenLlamaDynamicNTKScalingRotaryEmbedding(OpenLlamaRotaryEmbedding): """OpenLlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla""" @@ -155,7 +151,6 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. @@ -228,7 +223,6 @@ def __init__(self, config: OpenLlamaConfig): self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False) self._init_rope() - # Copied from transformers.models.llama.modeling_llama.LlamaAttention._init_rope with Llama->OpenLlama def _init_rope(self): if self.config.rope_scaling is None: self.rotary_emb = OpenLlamaRotaryEmbedding( diff --git a/src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py b/src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py index f58c9b7fd65946..036ca99c73b502 100755 --- a/src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py +++ b/src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py @@ -142,7 +142,6 @@ def load_tf_weights_in_qdqbert(model, tf_checkpoint_path): return model -# Copied from transformers.models.bert.modeling_bert.BertEmbeddings with Bert -> QDQBert class QDQBertEmbeddings(nn.Module): """Construct the embeddings from word, position and token_type embeddings.""" @@ -628,7 +627,6 @@ def forward( ) -# Copied from transformers.models.bert.modeling_bert.BertPooler with Bert -> QDQBert class QDQBertPooler(nn.Module): def __init__(self, config): super().__init__() @@ -644,7 +642,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return pooled_output -# Copied from transformers.models.bert.modeling_bert.BertPredictionHeadTransform with Bert -> QDQBert class QDQBertPredictionHeadTransform(nn.Module): def __init__(self, config): super().__init__() @@ -697,7 +694,6 @@ def forward(self, sequence_output): return prediction_scores -# Copied from transformers.models.bert.modeling_bert.BertOnlyNSPHead with Bert -> QDQBert class QDQBertOnlyNSPHead(nn.Module): def __init__(self, config): super().__init__() diff --git a/src/transformers/models/deprecated/realm/modeling_realm.py b/src/transformers/models/deprecated/realm/modeling_realm.py index f41eafe1840585..605f204fb578d9 100644 --- a/src/transformers/models/deprecated/realm/modeling_realm.py +++ b/src/transformers/models/deprecated/realm/modeling_realm.py @@ -150,7 +150,6 @@ def load_tf_weights_in_realm(model, config, tf_checkpoint_path): return model -# Copied from transformers.models.bert.modeling_bert.BertEmbeddings with Bert->Realm class RealmEmbeddings(nn.Module): """Construct the embeddings from word, position and token_type embeddings.""" @@ -215,7 +214,6 @@ def forward( return embeddings -# Copied from transformers.models.bert.modeling_bert.BertSelfAttention with Bert->Realm class RealmSelfAttention(nn.Module): def __init__(self, config, position_embedding_type=None): super().__init__() @@ -350,7 +348,6 @@ def forward( return outputs -# Copied from transformers.models.bert.modeling_bert.BertSelfOutput with Bert->Realm class RealmSelfOutput(nn.Module): def __init__(self, config): super().__init__() @@ -370,7 +367,6 @@ def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> to } -# Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Realm,BERT->REALM class RealmAttention(nn.Module): def __init__(self, config, position_embedding_type=None): super().__init__() @@ -422,7 +418,6 @@ def forward( return outputs -# Copied from transformers.models.bert.modeling_bert.BertIntermediate with Bert->Realm class RealmIntermediate(nn.Module): def __init__(self, config): super().__init__() @@ -438,7 +433,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return hidden_states -# Copied from transformers.models.bert.modeling_bert.BertOutput with Bert->Realm class RealmOutput(nn.Module): def __init__(self, config): super().__init__() @@ -453,7 +447,6 @@ def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> to return hidden_states -# Copied from transformers.models.bert.modeling_bert.BertLayer with Bert->Realm class RealmLayer(nn.Module): def __init__(self, config): super().__init__() @@ -540,7 +533,6 @@ def feed_forward_chunk(self, attention_output): return layer_output -# Copied from transformers.models.bert.modeling_bert.BertEncoder with Bert->Realm class RealmEncoder(nn.Module): def __init__(self, config): super().__init__() @@ -634,7 +626,6 @@ def forward( ) -# Copied from transformers.models.bert.modeling_bert.BertPooler with Bert->Realm class RealmPooler(nn.Module): def __init__(self, config): super().__init__() diff --git a/src/transformers/models/deprecated/retribert/tokenization_retribert.py b/src/transformers/models/deprecated/retribert/tokenization_retribert.py index c991f3972230bd..2f66fcc1edd129 100644 --- a/src/transformers/models/deprecated/retribert/tokenization_retribert.py +++ b/src/transformers/models/deprecated/retribert/tokenization_retribert.py @@ -28,7 +28,6 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} -# Copied from transformers.models.bert.tokenization_bert.load_vocab def load_vocab(vocab_file): """Loads a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() @@ -40,7 +39,6 @@ def load_vocab(vocab_file): return vocab -# Copied from transformers.models.bert.tokenization_bert.whitespace_tokenize def whitespace_tokenize(text): """Runs basic whitespace cleaning and splitting on a piece of text.""" text = text.strip() @@ -96,7 +94,6 @@ class RetriBertTokenizer(PreTrainedTokenizer): vocab_files_names = VOCAB_FILES_NAMES model_input_names = ["input_ids", "attention_mask"] - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.__init__ def __init__( self, vocab_file, @@ -145,20 +142,16 @@ def __init__( ) @property - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.do_lower_case def do_lower_case(self): return self.basic_tokenizer.do_lower_case @property - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.vocab_size def vocab_size(self): return len(self.vocab) - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.get_vocab def get_vocab(self): return dict(self.vocab, **self.added_tokens_encoder) - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer._tokenize def _tokenize(self, text, split_special_tokens=False): split_tokens = [] if self.do_basic_tokenize: @@ -174,23 +167,19 @@ def _tokenize(self, text, split_special_tokens=False): split_tokens = self.wordpiece_tokenizer.tokenize(text) return split_tokens - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer._convert_token_to_id def _convert_token_to_id(self, token): """Converts a token (str) in an id using the vocab.""" return self.vocab.get(token, self.vocab.get(self.unk_token)) - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer._convert_id_to_token def _convert_id_to_token(self, index): """Converts an index (integer) in a token (str) using the vocab.""" return self.ids_to_tokens.get(index, self.unk_token) - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.convert_tokens_to_string def convert_tokens_to_string(self, tokens): """Converts a sequence of tokens (string) in a single string.""" out_string = " ".join(tokens).replace(" ##", "").strip() return out_string - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.build_inputs_with_special_tokens def build_inputs_with_special_tokens( self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None ) -> List[int]: @@ -216,7 +205,6 @@ def build_inputs_with_special_tokens( sep = [self.sep_token_id] return cls + token_ids_0 + sep + token_ids_1 + sep - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.get_special_tokens_mask def get_special_tokens_mask( self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False ) -> List[int]: @@ -245,7 +233,6 @@ def get_special_tokens_mask( return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] return [1] + ([0] * len(token_ids_0)) + [1] - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.create_token_type_ids_from_sequences def create_token_type_ids_from_sequences( self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None ) -> List[int]: @@ -275,7 +262,6 @@ def create_token_type_ids_from_sequences( return len(cls + token_ids_0 + sep) * [0] return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1] - # Copied from transformers.models.bert.tokenization_bert.BertTokenizer.save_vocabulary def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: index = 0 if os.path.isdir(save_directory): @@ -297,7 +283,6 @@ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = return (vocab_file,) -# Copied from transformers.models.bert.tokenization_bert.BasicTokenizer class BasicTokenizer(object): """ Constructs a BasicTokenizer that will run basic tokenization (punctuation splitting, lower casing, etc.). @@ -459,7 +444,6 @@ def _clean_text(self, text): return "".join(output) -# Copied from transformers.models.bert.tokenization_bert.WordpieceTokenizer class WordpieceTokenizer(object): """Runs WordPiece tokenization.""" diff --git a/src/transformers/models/deprecated/retribert/tokenization_retribert_fast.py b/src/transformers/models/deprecated/retribert/tokenization_retribert_fast.py index 97fbfc07d30ca6..9a915d1597956e 100644 --- a/src/transformers/models/deprecated/retribert/tokenization_retribert_fast.py +++ b/src/transformers/models/deprecated/retribert/tokenization_retribert_fast.py @@ -76,7 +76,6 @@ class RetriBertTokenizerFast(PreTrainedTokenizerFast): slow_tokenizer_class = RetriBertTokenizer model_input_names = ["input_ids", "attention_mask"] - # Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.__init__ def __init__( self, vocab_file=None, @@ -119,7 +118,6 @@ def __init__( self.do_lower_case = do_lower_case - # Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.build_inputs_with_special_tokens def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): """ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and @@ -144,7 +142,6 @@ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): return output - # Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.create_token_type_ids_from_sequences def create_token_type_ids_from_sequences( self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None ) -> List[int]: @@ -174,7 +171,6 @@ def create_token_type_ids_from_sequences( return len(cls + token_ids_0 + sep) * [0] return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1] - # Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.save_vocabulary def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: files = self._tokenizer.model.save(save_directory, name=filename_prefix) return tuple(files) diff --git a/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py index 6953821648e9d4..8f1a8370933c91 100755 --- a/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py @@ -36,7 +36,6 @@ _CHECKPOINT_FOR_DOC = "facebook/s2t-wav2vec2-large-en-de" -# Copied from transformers.models.speech_to_text.modeling_speech_to_text.Speech2TextSinusoidalPositionalEmbedding with Speech2Text->Speech2Text2 class Speech2Text2SinusoidalPositionalEmbedding(nn.Module): """This module produces sinusoidal positional embeddings of any length.""" @@ -107,7 +106,6 @@ def create_position_ids_from_input_ids( return incremental_indices.long() + padding_idx -# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->Speech2Text2 class Speech2Text2Attention(nn.Module): """Multi-headed attention from 'Attention Is All You Need' paper""" diff --git a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py index ae84a7df195e07..7f82aacf6e8b5e 100644 --- a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py @@ -340,7 +340,6 @@ def forward(self, audio_values: torch.Tensor) -> torch.Tensor: return embeddings -# Copied from transformers.models.vilt.modeling_vilt.ViltSelfAttention with Vilt->Tvlt class TvltSelfAttention(nn.Module): def __init__(self, config): super().__init__() @@ -401,7 +400,6 @@ def forward(self, hidden_states, attention_mask=None, head_mask=None, output_att return outputs -# Copied from transformers.models.vilt.modeling_vilt.ViltSelfOutput with Vilt->Tvlt class TvltSelfOutput(nn.Module): """ The residual connection is defined in TvltLayer instead of here (as is the case with other models), due to the @@ -420,7 +418,6 @@ def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> to return hidden_states -# Copied from transformers.models.vilt.modeling_vilt.ViltAttention with Vilt->Tvlt class TvltAttention(nn.Module): def __init__(self, config): super().__init__() @@ -455,7 +452,6 @@ def forward(self, hidden_states, attention_mask=None, head_mask=None, output_att return outputs -# Copied from transformers.models.vilt.modeling_vilt.ViltIntermediate with Vilt->Tvlt class TvltIntermediate(nn.Module): def __init__(self, config: TvltConfig) -> None: super().__init__() @@ -472,7 +468,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return hidden_states -# Copied from transformers.models.vilt.modeling_vilt.ViltOutput with Vilt->Tvlt class TvltOutput(nn.Module): def __init__(self, config: TvltConfig) -> None: super().__init__() @@ -488,7 +483,6 @@ def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> to return hidden_states -# Copied from transformers.models.vilt.modeling_vilt.ViltLayer with Vilt->Tvlt class TvltLayer(nn.Module): """This corresponds to the Block class in the timm implementation.""" @@ -527,7 +521,6 @@ def forward(self, hidden_states, attention_mask=None, head_mask=None, output_att return outputs -# Copied from transformers.models.vilt.modeling_vilt.ViltEncoder with Vilt->Tvlt class TvltEncoder(nn.Module): def __init__(self, config): super().__init__() diff --git a/src/transformers/models/deprecated/van/modeling_van.py b/src/transformers/models/deprecated/van/modeling_van.py index 1b26d8892bb265..440881c7510b52 100644 --- a/src/transformers/models/deprecated/van/modeling_van.py +++ b/src/transformers/models/deprecated/van/modeling_van.py @@ -48,7 +48,6 @@ _IMAGE_CLASS_EXPECTED_OUTPUT = "tabby, tabby cat" -# Copied from transformers.models.convnext.modeling_convnext.drop_path def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor: """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). @@ -69,7 +68,6 @@ def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = Fals return output -# Copied from transformers.models.convnext.modeling_convnext.ConvNextDropPath with ConvNext->Van class VanDropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" diff --git a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index b8db4a7faee144..89a8f9e676e8a8 100644 --- a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -140,7 +140,6 @@ def __init__( "input_data_format", ] - # Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize def resize( self, image: np.ndarray, diff --git a/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py index 9c025d36153982..a1f34bfe40d6c4 100644 --- a/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py @@ -51,7 +51,6 @@ class ViTHybridEmbeddings(nn.Module): Construct the CLS token, position and patch embeddings. Optionally, also the mask token. """ - # Copied from transformers.models.vit.modeling_vit.ViTEmbeddings.__init__ with ViT->ViTHybrid def __init__(self, config: ViTHybridConfig, use_mask_token: bool = False) -> None: super().__init__() @@ -186,7 +185,6 @@ def forward(self, pixel_values: torch.Tensor, interpolate_pos_encoding: bool = F return embeddings -# Copied from transformers.models.vit.modeling_vit.ViTSelfAttention with ViT->ViTHybrid class ViTHybridSelfAttention(nn.Module): def __init__(self, config: ViTHybridConfig) -> None: super().__init__() @@ -247,7 +245,6 @@ def forward( return outputs -# Copied from transformers.models.vit.modeling_vit.ViTSdpaSelfAttention with ViT->ViTHybrid class ViTHybridSdpaSelfAttention(ViTHybridSelfAttention): def __init__(self, config: ViTHybridConfig) -> None: super().__init__(config) @@ -279,7 +276,6 @@ def forward( return context_layer, None -# Copied from transformers.models.vit.modeling_vit.ViTSelfOutput with ViT->ViTHybrid class ViTHybridSelfOutput(nn.Module): """ The residual connection is defined in ViTHybridLayer instead of here (as is the case with other models), due to the @@ -298,7 +294,6 @@ def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> to return hidden_states -# Copied from transformers.models.vit.modeling_vit.ViTAttention with ViT->ViTHybrid class ViTHybridAttention(nn.Module): def __init__(self, config: ViTHybridConfig) -> None: super().__init__() @@ -338,14 +333,12 @@ def forward( return outputs -# Copied from transformers.models.vit.modeling_vit.ViTSdpaAttention with ViT->ViTHybrid class ViTHybridSdpaAttention(ViTHybridAttention): def __init__(self, config: ViTHybridConfig) -> None: super().__init__(config) self.attention = ViTHybridSdpaSelfAttention(config) -# Copied from transformers.models.vit.modeling_vit.ViTIntermediate with ViT->ViTHybrid class ViTHybridIntermediate(nn.Module): def __init__(self, config: ViTHybridConfig) -> None: super().__init__() @@ -362,7 +355,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: return hidden_states -# Copied from transformers.models.vit.modeling_vit.ViTOutput with ViT->ViTHybrid class ViTHybridOutput(nn.Module): def __init__(self, config: ViTHybridConfig) -> None: super().__init__() @@ -427,7 +419,6 @@ def forward( return outputs -# Copied from transformers.models.vit.modeling_vit.ViTEncoder with ViT->ViTHybrid class ViTHybridEncoder(nn.Module): def __init__(self, config: ViTHybridConfig) -> None: super().__init__() @@ -479,7 +470,6 @@ def forward( ) -# Copied from transformers.models.vit.modeling_vit.ViTPreTrainedModel with ViT->ViTHybrid class ViTHybridPreTrainedModel(PreTrainedModel): """ An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained @@ -558,7 +548,6 @@ def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> No "The bare ViT Hybrid Model transformer outputting raw hidden-states without any specific head on top.", VIT_START_DOCSTRING, ) -# Copied from transformers.models.vit.modeling_vit.ViTModel with ViT->ViTHybrid class ViTHybridModel(ViTHybridPreTrainedModel): def __init__(self, config: ViTHybridConfig, add_pooling_layer: bool = True, use_mask_token: bool = False): super().__init__(config) @@ -654,7 +643,6 @@ def forward( ) -# Copied from transformers.models.vit.modeling_vit.ViTPooler with ViT->ViTHybrid class ViTHybridPooler(nn.Module): def __init__(self, config: ViTHybridConfig): super().__init__() @@ -677,7 +665,6 @@ def forward(self, hidden_states): """, VIT_START_DOCSTRING, ) -# Copied from transformers.models.vit.modeling_vit.ViTForImageClassification with ViT->ViTHybrid class ViTHybridForImageClassification(ViTHybridPreTrainedModel): def __init__(self, config: ViTHybridConfig) -> None: super().__init__(config) diff --git a/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py index 68fb70d4f1a640..e9e709af993dea 100644 --- a/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py @@ -44,7 +44,6 @@ _CONFIG_FOR_DOC = "XLMProphetNetConfig" -# Copied from src.transformers.models.prophetnet.modeling_prophetnet.PROPHETNET_START_DOCSTRING with ProphetNetConfig->XLMProphetNetConfig XLM_PROPHETNET_START_DOCSTRING = r""" This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads @@ -64,7 +63,6 @@ configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights. """ -# Copied from src.transformers.models.prophetnet.modeling_prophetnet.PROPHETNET_INPUTS_DOCSTRING with ProphetNet->XLMProphetNet XLM_PROPHETNET_INPUTS_DOCSTRING = r""" Args: input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): @@ -139,7 +137,6 @@ """ -# Copied from src.transformers.models.prophetnet.modeling_prophetnet.PROPHETNET_STANDALONE_INPUTS_DOCSTRING with ProphetNet->XLMProphetNet XLM_PROPHETNET_STANDALONE_INPUTS_DOCSTRING = r""" Args: input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): @@ -174,7 +171,6 @@ """ -# Copied from transformers.models.prophetnet.modeling_prophetnet.softmax def softmax(hidden_state, dim, onnx_trace=False): if onnx_trace: return nn.functional.softmax(hidden_state.float(), dim=dim) @@ -182,7 +178,6 @@ def softmax(hidden_state, dim, onnx_trace=False): return nn.functional.softmax(hidden_state, dim=dim, dtype=torch.float32) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ngram_attention_bias def ngram_attention_bias(sequence_length, ngram, device, dtype): """ This function computes the bias for the predict stream @@ -200,7 +195,6 @@ def ngram_attention_bias(sequence_length, ngram, device, dtype): return torch.cat([left_block, right_block], dim=2) -# Copied from transformers.models.prophetnet.modeling_prophetnet.compute_relative_buckets def compute_relative_buckets(num_buckets, max_distance, relative_positions, is_bidirectional=False): """ This function computes individual parts of the relative position buckets. For more detail, see paper. @@ -228,7 +222,6 @@ def compute_relative_buckets(num_buckets, max_distance, relative_positions, is_b return rel_positions_bucket -# Copied from transformers.models.prophetnet.modeling_prophetnet.compute_all_stream_relative_buckets def compute_all_stream_relative_buckets(num_buckets, max_distance, position_ids): """ This function computes both main and predict relative position buckets. For more detail, see paper. @@ -253,7 +246,6 @@ def compute_all_stream_relative_buckets(num_buckets, max_distance, position_ids) @dataclass -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput with ProphetNet->XLMProphetNet all-casing class XLMProphetNetSeq2SeqLMOutput(ModelOutput): """ Base class for sequence-to-sequence language models outputs. @@ -339,7 +331,6 @@ def decoder_cross_attentions(self): @dataclass -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput with ProphetNet->XLMProphetNet all-casing class XLMProphetNetSeq2SeqModelOutput(ModelOutput): """ Base class for model encoder's outputs that also contains : pre-computed hidden states that can speed up sequential @@ -426,7 +417,6 @@ def decoder_cross_attentions(self): @dataclass -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput with ProphetNet->XLMProphetNet all-casing class XLMProphetNetDecoderModelOutput(ModelOutput): """ Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding). @@ -487,7 +477,6 @@ class XLMProphetNetDecoderModelOutput(ModelOutput): @dataclass -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput with ProphetNet->XLMProphetNet all-casing class XLMProphetNetDecoderLMOutput(ModelOutput): """ Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding). @@ -549,7 +538,6 @@ class XLMProphetNetDecoderLMOutput(ModelOutput): cross_attentions: Optional[Tuple[torch.FloatTensor]] = None -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetPreTrainedModel with ProphetNet->XLMProphetNet class XLMProphetNetPreTrainedModel(PreTrainedModel): config_class = XLMProphetNetConfig base_model_prefix = "prophetnet" @@ -588,7 +576,6 @@ def _shift_right(self, input_ids): return shifted_input_ids -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetPositionalEmbeddings with ProphetNet->XLMProphetNet class XLMProphetNetPositionalEmbeddings(nn.Embedding): """ This module learns positional embeddings up to a fixed maximum size. Padding ids are ignored by either offsetting @@ -632,7 +619,6 @@ def _forward(self, position_ids): return super().forward(position_ids) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetAttention with ProphetNet->XLMProphetNet class XLMProphetNetAttention(nn.Module): """Multi-headed attention from 'Attention Is All You Need' paper""" @@ -762,7 +748,6 @@ def forward( return attn_output, attn_weights_reshaped, past_key_value -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetFeedForward with ProphetNet->XLMProphetNet class XLMProphetNetFeedForward(nn.Module): """ This is the residual two feed-forward layer block based on the original Transformer implementation. @@ -786,7 +771,6 @@ def forward(self, hidden_states): return hidden_states -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetNgramSelfAttention with ProphetNet->XLMProphetNet class XLMProphetNetNgramSelfAttention(nn.Module): def __init__(self, config: XLMProphetNetConfig): super().__init__() @@ -1106,7 +1090,6 @@ def get_predict_relative_pos_embeddings( return predict_relative_pos_embeddings -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetEncoderLayer with ProphetNet->XLMProphetNet, Prophetnet->XLMProphetnet class XLMProphetNetEncoderLayer(nn.Module): """ Encoder block for XLMProphetnet @@ -1150,7 +1133,6 @@ def forward( return outputs -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetDecoderLayer with Prophetnet->XLMProphetnet, ProphetNet->XLMProphetNet class XLMProphetNetDecoderLayer(nn.Module): """ Decoder block for XLMProphetnet @@ -1239,7 +1221,6 @@ def forward( "The standalone encoder part of the XLMProphetNetModel.", XLM_PROPHETNET_START_DOCSTRING, ) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetEncoder with microsoft/prophetnet-large-uncased->patrickvonplaten/xprophetnet-large-uncased-standalone, ProphetNet->XLMProphetNet, PROPHETNET->XLM_PROPHETNET class XLMProphetNetEncoder(XLMProphetNetPreTrainedModel): r""" word_embeddings (`torch.nn.Embeddings` of shape `(config.vocab_size, config.hidden_size)`, *optional*): @@ -1374,7 +1355,6 @@ def forward( "The standalone decoder part of the XLMProphetNetModel.", XLM_PROPHETNET_START_DOCSTRING, ) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetDecoder with microsoft/prophetnet-large-uncased->patrickvonplaten/xprophetnet-large-uncased-standalone, ProphetNet->XLMProphetNet, PROPHETNET->XLM_PROPHETNET, class XLMProphetNetDecoder(XLMProphetNetPreTrainedModel): r""" word_embeddings (`torch.nn.Embeddings` of shape `(config.vocab_size, config.hidden_size)`, *optional*): @@ -1743,7 +1723,6 @@ def prepare_predict_attention_mask(self, hidden_states, attention_mask): "The bare XLMProphetNet Model outputting raw hidden-states without any specific head on top.", XLM_PROPHETNET_START_DOCSTRING, ) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetModel with microsoft/prophetnet-large-uncased->patrickvonplaten/xprophetnet-large-uncased-standalone, ProphetNet->XLMProphetNet, PROPHETNET->XLM_PROPHETNET class XLMProphetNetModel(XLMProphetNetPreTrainedModel): _tied_weights_keys = ["encoder.word_embeddings.weight", "decoder.word_embeddings.weight"] @@ -1878,7 +1857,6 @@ def forward( "The XLMProphetNet Model with a language modeling head. Can be used for sequence generation tasks.", XLM_PROPHETNET_START_DOCSTRING, ) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetForConditionalGeneration with microsoft/prophetnet-large-uncased->patrickvonplaten/xprophetnet-large-uncased-standalone, ProphetNet->XLMProphetNet, PROPHETNET->XLM_PROPHETNET class XLMProphetNetForConditionalGeneration(XLMProphetNetPreTrainedModel): _tied_weights_keys = ["encoder.word_embeddings.weight", "decoder.word_embeddings.weight", "lm_head.weight"] @@ -2073,7 +2051,6 @@ def prepare_decoder_input_ids_from_labels(self, labels: torch.Tensor): return self._shift_right(labels) @staticmethod - # Copied from transformers.models.bart.modeling_bart.BartForConditionalGeneration._reorder_cache def _reorder_cache(past_key_values, beam_idx): reordered_past = () for layer_past in past_key_values: @@ -2096,7 +2073,6 @@ def get_decoder(self): " language modeling.", XLM_PROPHETNET_START_DOCSTRING, ) -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetForCausalLM with microsoft/prophetnet-large-uncased->patrickvonplaten/xprophetnet-large-uncased-standalone, ProphetNet->XLMProphetNet, PROPHETNET->XLM_PROPHETNET class XLMProphetNetForCausalLM(XLMProphetNetPreTrainedModel): _tied_weights_keys = [ "prophetnet.word_embeddings.weight", @@ -2329,7 +2305,6 @@ def prepare_inputs_for_generation( } @staticmethod - # Copied from transformers.models.bart.modeling_bart.BartForCausalLM._reorder_cache def _reorder_cache(past_key_values, beam_idx): reordered_past = () for layer_past in past_key_values: @@ -2339,7 +2314,6 @@ def _reorder_cache(past_key_values, beam_idx): return reordered_past -# Copied from transformers.models.prophetnet.modeling_prophetnet.ProphetNetDecoderWrapper with ProphetNet->XLMProphetNet, prophetnet->XLMProphetNet class XLMProphetNetDecoderWrapper(XLMProphetNetPreTrainedModel): """ This is a wrapper class, so that [`XLMProphetNetForCausalLM`] can correctly be loaded from pretrained XLMProphetNet diff --git a/utils/deprecate_models.py b/utils/deprecate_models.py index 23308e91a767fc..add8da74d9308b 100644 --- a/utils/deprecate_models.py +++ b/utils/deprecate_models.py @@ -124,6 +124,25 @@ def update_relative_imports(filename, model): f.write("\n".join(new_file_lines)) +def remove_copied_from_statements(model): + model_path = REPO_PATH / f"src/transformers/models/{model}" + for file in os.listdir(model_path): + if file == "__pycache__": + continue + file_path = model_path / file + with open(file_path, "r") as f: + file_lines = f.read() + + new_file_lines = [] + for line in file_lines.split("\n"): + if "# Copied from" in line: + continue + new_file_lines.append(line) + + with open(file_path, "w") as f: + f.write("\n".join(new_file_lines)) + + def move_model_files_to_deprecated(model): model_path = REPO_PATH / f"src/transformers/models/{model}" deprecated_model_path = REPO_PATH / f"src/transformers/models/deprecated/{model}" @@ -321,6 +340,10 @@ def deprecate_models(models): print("Adding tip message to model doc page") insert_tip_to_model_doc(model_info["model_doc_path"], tip_message) + # Remove #Copied from statements from model's files + print("Removing #Copied from statements from model's files") + remove_copied_from_statements(model) + # Move the model file to deprecated: src/transfomers/models/model -> src/transformers/models/deprecated/model print("Moving model files to deprecated for model") move_model_files_to_deprecated(model)