Convert directory fbcode/torchmultimodal to use the Ruff Formatter (#531

) Summary: Pull Request resolved: #531 Converts the directory specified to use the Ruff formatter in pyfmt ruff_dog If this diff causes merge conflicts when rebasing, please run `hg status -n -0 --change . -I '**/*.{py,pyi}' | xargs -0 arc pyfmt` on your diff, and amend any changes before rebasing onto latest. That should help reduce or eliminate any merge conflicts. allow-large-files Reviewed By: amyreese Differential Revision: D66105544 fbshipit-source-id: 0994dff620e2b718bdfbf4aafd472d468a6f6389
facebookresearch · Nov 18, 2024 · 6569fcc · 6569fcc
1 parent e4d288b
commit 6569fcc
Show file tree

Hide file tree

Showing 38 changed files with 19 additions and 57 deletions.
diff --git a/examples/albef/data/retrieval_datamodule.py b/examples/albef/data/retrieval_datamodule.py
@@ -161,7 +161,7 @@ def text_dataloader(
 
 
 def retrieval_train_collate_fn(
-    batch: List[Tuple[Tensor, Tensor, int]]
+    batch: List[Tuple[Tensor, Tensor, int]],
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
     image_list = []
     text_list = []

diff --git a/examples/albef/data/vqa_datamodules.py b/examples/albef/data/vqa_datamodules.py
@@ -157,7 +157,7 @@ def test_dataloader(
 
 
 def vqa_train_collate_fn(
-    batch: List[Tuple[Tensor, Tensor, List[Tensor], List[float]]]
+    batch: List[Tuple[Tensor, Tensor, List[Tensor], List[float]]],
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, List[int]]:
     image_list = []
     question_list = []
@@ -188,7 +188,7 @@ def vqa_train_collate_fn(
 
 
 def vqa_test_collate_fn(
-    batch: List[Tuple[Tensor, Tensor, int]]
+    batch: List[Tuple[Tensor, Tensor, int]],
 ) -> Tuple[Tensor, Tensor, Tensor, List[int]]:
     image_list, question_list, question_ids = [], [], []
     for image, question, question_id in batch:

diff --git a/examples/mdetr/data/dataset.py b/examples/mdetr/data/dataset.py
@@ -240,7 +240,6 @@ def collate_fn(tokenizer, batch):
 
 
 def build_flickr(image_set, tokenizer, transform, args):
-
     img_dir = Path(args.flickr_img_path) / f"{image_set}"
 
     if args.GT_type == "merged":

diff --git a/examples/mdetr/data/flickr_eval.py b/examples/mdetr/data/flickr_eval.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-""" Evaluator for Flickr30k """
+"""Evaluator for Flickr30k"""
+
 import xml.etree.ElementTree as Et
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Union
@@ -184,7 +185,6 @@ def __init__(
         iou_thresh: float = 0.5,
         merge_boxes: bool = False,
     ):
-
         assert subset in ["train", "test", "val"], f"Wrong flickr subset {subset}"
 
         self.topk = topk

diff --git a/examples/mdetr/data/postprocessors.py b/examples/mdetr/data/postprocessors.py
@@ -49,7 +49,6 @@ def __call__(
         positive_map: Tensor,
         phrases_per_sample: List[int],
     ) -> List[List[List[float]]]:
-
         assert output_logits.size(0) == target_sizes.size(
             0
         ), "Logits and target sizes should both have first dim = batch_size"

diff --git a/examples/mdetr/loss.py b/examples/mdetr/loss.py
@@ -67,7 +67,7 @@ def contrastive_alignment_loss(
     neg_term = negative_logits.logsumexp(2)
     nb_pos = positive_map.sum(2) + 1e-6
     box_to_token_loss = (
-        ((pos_term / nb_pos + neg_term)).masked_fill(~boxes_with_pos, 0).sum()
+        (pos_term / nb_pos + neg_term).masked_fill(~boxes_with_pos, 0).sum()
     )
 
     # Calculate the contrastive loss for all tokens
@@ -76,7 +76,7 @@ def contrastive_alignment_loss(
     neg_term = negative_logits.logsumexp(1)
     nb_pos = positive_map.sum(1) + 1e-6
     tokens_to_boxes_loss = (
-        ((pos_term / nb_pos + neg_term)).masked_fill(~tokens_with_pos, 0).sum()
+        (pos_term / nb_pos + neg_term).masked_fill(~tokens_with_pos, 0).sum()
     )
 
     tot_loss = (box_to_token_loss + tokens_to_boxes_loss) / 2
@@ -227,7 +227,6 @@ def forward(
         vqa_masks: Optional[Dict[str, Tensor]] = None,
         weight_dict: Optional[Dict[str, float]] = None,
     ) -> Dict[str, Tensor]:
-
         target_boxes = [t["boxes"] for t in targets]
         target_tokens = [t["tokens_positive"] for t in targets]
         n_target_boxes = [len(t) for t in target_boxes]
@@ -292,7 +291,6 @@ def build_mdetr_loss(
     no_object_weight: float = 0.1,
     temperature: Optional[float] = None,
 ) -> MDETRLoss:
-
     soft_token_loss = partial(
         soft_token_prediction_loss, no_object_weight=no_object_weight
     )

diff --git a/examples/mdetr/matcher.py b/examples/mdetr/matcher.py
@@ -73,7 +73,6 @@ def forward(
         target_boxes_per_sample: List[Tensor],
         positive_map: Tensor,
     ) -> List[Tuple[Tensor, Tensor]]:
-
         bs, num_queries = pred_logits.shape[:2]
         target_boxes = torch.cat(target_boxes_per_sample)
         # We flatten to compute the cost matrices in a batch

diff --git a/examples/mdetr/optimizer.py b/examples/mdetr/optimizer.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 """Collections of utilities related to optimization."""
+
 from bisect import bisect_right
 
 import torch

diff --git a/examples/mdetr/phrase_grounding.py b/examples/mdetr/phrase_grounding.py
@@ -31,7 +31,6 @@ def evaluate(
     evaluator,
     device,
 ):
-
     model.eval()
 
     metric_logger = MetricLogger(delimiter="  ")

diff --git a/examples/mdetr/utils/args_parse.py b/examples/mdetr/utils/args_parse.py
@@ -8,7 +8,6 @@
 
 
 def get_args_parser():
-
     parser = argparse.ArgumentParser("MDETR", add_help=False)
     parser.add_argument("--dataset_config", default=None, required=True)
     # Transformer

diff --git a/examples/mdetr/utils/metrics.py b/examples/mdetr/utils/metrics.py
@@ -7,6 +7,7 @@
 """
 Various utilities related to track and report metrics
 """
+
 import datetime
 import time
 from collections import defaultdict, deque

diff --git a/examples/mdetr/vqa_eval.py b/examples/mdetr/vqa_eval.py
@@ -32,7 +32,6 @@ def evaluate(
     device,
     weight_dict,
 ):
-
     model.eval()
     metric_logger = MetricLogger(delimiter="  ")
     header = "Test:"

diff --git a/examples/mugen/data/mugen_dataset.py b/examples/mugen/data/mugen_dataset.py
@@ -38,9 +38,7 @@ class MUGENDatasetArgs:
         True  # render smap for mugen (and shield) as bounding boxes
     )
     bbox_smap_for_monsters: bool = True  # render smap for monsters as bounding boxes
-    use_manual_annotation: bool = (
-        False  # if True will only use videos with manual annotation and skip those without
-    )
+    use_manual_annotation: bool = False  # if True will only use videos with manual annotation and skip those without
     use_auto_annotation: bool = (
         True  # if True will only use videos with auto annotation and skip those without
     )

diff --git a/examples/mugen/retrieval/definitions.py b/examples/mugen/retrieval/definitions.py
@@ -81,9 +81,7 @@ class EvaluationArgs:
     datamodule_args: DataModuleArgs = DataModuleArgs()
     lightningmodule_args: LightningModuleArgs = LightningModuleArgs()
     videoclip_args: VideoCLIPArgs = VideoCLIPArgs()
-    checkpoint_path: str = (
-        "https://pytorch.s3.amazonaws.com/models/multimodal/mugen/videoclip_lightning_mugen.pt"
-    )
+    checkpoint_path: str = "https://pytorch.s3.amazonaws.com/models/multimodal/mugen/videoclip_lightning_mugen.pt"
     accelerator: str = "auto"
 
 

diff --git a/examples/omnivore/data/data_builder.py b/examples/omnivore/data/data_builder.py
@@ -76,9 +76,7 @@ def construct_data_loader(dataset, sampler, num_workers, mode, args, drop_last=F
             mixupcutmix = torchvision.transforms.RandomChoice(mixup_transforms)
             # Since not all dataset return tuple of same length, we take the
             # first two elements for mixupcutmix during training
-            collate_fn = lambda batch: mixupcutmix(
-                *(default_collate(batch)[:2])
-            )  # noqa: E731
+            collate_fn = lambda batch: mixupcutmix(*(default_collate(batch)[:2]))  # noqa: E731
 
     data_loader = torch.utils.data.DataLoader(
         dataset,

diff --git a/examples/omnivore/data/presets.py b/examples/omnivore/data/presets.py
@@ -58,7 +58,6 @@ def __init__(
         std=(0.229, 0.224, 0.225),
         interpolation=InterpolationMode.BICUBIC,
     ):
-
         self.transforms = T.Compose(
             [
                 T.Resize(resize_size, interpolation=interpolation),
@@ -187,7 +186,6 @@ def __init__(
         std=(0.229, 0.224, 0.225, 0.0295),
         interpolation=InterpolationMode.BILINEAR,
     ):
-
         self.transforms = T.Compose(
             [
                 CT.DepthNorm(max_depth=max_depth, clamp_max_before_scale=True),

diff --git a/examples/omnivore/train.py b/examples/omnivore/train.py
@@ -38,10 +38,10 @@ def _chunk_forward_backward(
     args,
     scaler=None,
 ):
-
-    chunk_image, chunk_target = image[chunk_start:chunk_end, ...].to(device), target[
-        chunk_start:chunk_end, ...
-    ].to(device)
+    chunk_image, chunk_target = (
+        image[chunk_start:chunk_end, ...].to(device),
+        target[chunk_start:chunk_end, ...].to(device),
+    )
 
     with torch.cuda.amp.autocast(enabled=scaler is not None):
         chunk_output = model(chunk_image, input_type)

diff --git a/tests/models/test_late_fusion.py b/tests/models/test_late_fusion.py
@@ -81,7 +81,6 @@ def modalities_3(self):
         }
 
     def test_forward(self, late_fusion, modalities_1):
-
         actual = late_fusion(modalities_1)
         expected = torch.Tensor(
             [[1, 0, 0.25, 0.75, 3, 1, 0.8, 0.9], [0, 1, 0.6, 0.4, 0.7, 2, 0.6, 0]]
@@ -90,7 +89,6 @@ def test_forward(self, late_fusion, modalities_1):
         assert_expected(actual, expected)
 
     def test_script(self, late_fusion, modalities_2):
-
         scripted_late_fusion = torch.jit.script(late_fusion)
         actual = scripted_late_fusion(modalities_2)
         expected = torch.Tensor([[7, 0, 0.65, 8, 9, 0.8], [88, 5, 0.3, 0.74, 2, 0]])

diff --git a/tests/models/test_video_gpt.py b/tests/models/test_video_gpt.py
@@ -47,7 +47,6 @@ def set_seed():
 
 
 class TestVideoGPT:
-
     _model_name = "video_gpt"
 
     @pytest.fixture

diff --git a/torchmultimodal/models/clip/image_encoder.py b/torchmultimodal/models/clip/image_encoder.py
@@ -80,7 +80,6 @@ def __init__(
         self.projection = nn.Parameter(scale * torch.randn(width, embedding_dim))
 
     def forward(self, x: Tensor) -> Tensor:
-
         if x.size(2) != self.image_size or x.size(3) != self.image_size:
             raise ValueError(
                 f"Expected input with width and height as {self.image_size}, found {x.size(2)} by {x.size(3)} "

diff --git a/torchmultimodal/models/clip/model.py b/torchmultimodal/models/clip/model.py
@@ -68,7 +68,6 @@ def forward(
         features_a: torch.Tensor,
         features_b: torch.Tensor,
     ) -> CLIPOutput:
-
         embeddings_a = self.encoder_a(features_a)
         embeddings_b = self.encoder_b(features_b)
         embeddings_a = F.normalize(embeddings_a)

diff --git a/torchmultimodal/models/coca/coca_model.py b/torchmultimodal/models/coca/coca_model.py
@@ -493,7 +493,6 @@ def __init__(
     def forward(
         self, images: Tensor, texts: Tensor, text_padding_mask: Optional[Tensor] = None
     ) -> Dict[str, Tensor]:
-
         model_out = self.model(images, texts, text_padding_mask)
         mm_out = model_out.multimodal_embeddings
 

diff --git a/torchmultimodal/models/flava/image_encoder.py b/torchmultimodal/models/flava/image_encoder.py
@@ -247,7 +247,6 @@ def flava_image_encoder(
     patch_size: int = 16,
     num_channels: int = 3,
 ) -> ImageTransformer:
-
     embeddings = ImageEmbeddings(
         image_size=image_size,
         patch_size=patch_size,

diff --git a/torchmultimodal/models/flava/model.py b/torchmultimodal/models/flava/model.py
@@ -555,7 +555,6 @@ def flava_model_for_classification(
     pretrained: bool = True,
     **flava_model_kwargs: Any,
 ) -> FLAVAForClassification:
-
     classifier = MLP(
         in_dim=classifier_in_dim,
         out_dim=num_classes,

diff --git a/torchmultimodal/models/flava/text_encoder.py b/torchmultimodal/models/flava/text_encoder.py
@@ -35,7 +35,6 @@ def flava_text_encoder(
     # TextEncoder params
     initializer_range: float = 0.02,
 ) -> BERTTextEncoder:
-
     embeddings = BERTTextEmbeddings(
         hidden_size=hidden_size,
         vocab_size=vocab_size,

diff --git a/torchmultimodal/models/flava/transformer.py b/torchmultimodal/models/flava/transformer.py
@@ -261,7 +261,6 @@ def forward(
         return_attn_weights: bool = False,
         return_hidden_states: bool = False,
     ) -> TransformerOutput:
-
         all_hidden_states = [] if return_hidden_states else None
         all_self_attentions = [] if return_attn_weights else None
 

diff --git a/torchmultimodal/models/mdetr/model.py b/torchmultimodal/models/mdetr/model.py
@@ -123,7 +123,6 @@ def _pad_text(
         return padded_text, mask
 
     def forward(self, images: List[Tensor], text: List[Tensor]) -> MDETRModelOutput:
-
         images, image_mask = self._pad_images(images)
         text, text_attention_mask = self._pad_text(text)
         encoded_text = self.text_encoder(text, text_attention_mask)
@@ -392,7 +391,6 @@ def forward(
         images: List[Tensor],
         text: List[Tensor],
     ) -> MDETRPhraseGroundingOutput:
-
         model_output = self.model(images, text)
         final_hidden_state = model_output.transformer_output.decoder_hidden_states[-1]
 

diff --git a/torchmultimodal/models/mdetr/transformer.py b/torchmultimodal/models/mdetr/transformer.py
@@ -166,7 +166,6 @@ def forward(
         src_key_padding_mask: Optional[Tensor] = None,
         pos: Optional[Tensor] = None,
     ) -> Tensor:
-
         output = src
 
         for layer in self.layers:

diff --git a/torchmultimodal/modules/encoders/vision_transformer.py b/torchmultimodal/modules/encoders/vision_transformer.py
@@ -60,7 +60,6 @@ def forward(
         image_patches_mask: Optional[Tensor] = None,
         attention_mask: Optional[Tensor] = None,
     ) -> TransformerOutput:
-
         embedding_output = self.embeddings(
             images, image_patches_mask=image_patches_mask
         ).embeddings

diff --git a/torchmultimodal/modules/fusions/deepset_fusion.py b/torchmultimodal/modules/fusions/deepset_fusion.py
@@ -84,7 +84,6 @@ def __init__(
         self.mlp = mlp
 
     def forward(self, embeddings: Dict[str, Tensor]) -> Tensor:
-
         projections = {}
         for channel, projection in self.projections.items():
             projections[channel] = projection(embeddings[channel])

diff --git a/torchmultimodal/modules/layers/transformer.py b/torchmultimodal/modules/layers/transformer.py
@@ -366,9 +366,7 @@ def _cross_attention_block(
         encoder_hidden_states: Tensor,
         cross_attention_mask: Optional[Tensor] = None,
     ) -> Tensor:
-        assert (
-            self.cross_attention is not None
-        ), """
+        assert self.cross_attention is not None, """
             Cannot use cross-attention unless self.cross_attention and
             self.cross_attention_dropout are defined.
         """
@@ -399,7 +397,6 @@ def _forward_prenorm(
         past_key_value: Optional[Tuple[Tensor, Tensor]] = None,
         use_cache: bool = False,
     ) -> Tuple[Tensor, Optional[Tuple[Tensor, Tensor]]]:
-
         # Self-attention
         self_attn_input = self.attention_layernorm(hidden_states)
         attn_output, present_key_value = self._self_attention_block(
@@ -440,7 +437,6 @@ def _forward_postnorm(
         past_key_value: Optional[Tuple[Tensor, Tensor]] = None,
         use_cache: bool = False,
     ) -> Tuple[Tensor, Optional[Tuple[Tensor, Tensor]]]:
-
         # Self-attention
         attn_output, present_key_value = self._self_attention_block(
             hidden_states,

diff --git a/torchmultimodal/modules/losses/blip2_losses.py b/torchmultimodal/modules/losses/blip2_losses.py
@@ -21,6 +21,7 @@
 @dataclass
 class Blip2Stage1Losses(OrderedDict):
     "Blip-2 stage 1 losses"
+
     image_text_contrastive_loss: torch.Tensor
     image_text_matching_loss: torch.Tensor
     image_captioning_loss: torch.Tensor

diff --git a/torchmultimodal/modules/losses/contrastive_loss_with_temperature.py b/torchmultimodal/modules/losses/contrastive_loss_with_temperature.py
@@ -190,7 +190,6 @@ def forward(
         cross_entropy_kwargs: Optional[Dict[str, Any]] = None,
         mask: Optional[Tensor] = None,
     ) -> Tensor:
-
         self.logit_scale.data.clamp_(self.logit_scale_min, self.logit_scale_max)
         return contrastive_loss_with_temperature(
             embeddings_a=embeddings_a,

diff --git a/torchmultimodal/modules/losses/flava.py b/torchmultimodal/modules/losses/flava.py
@@ -264,7 +264,6 @@ def forward(
         text_sequence: Tensor,
         mask: Tensor,
     ) -> FLAVAGlobalContrastiveLossOutput:
-
         text_embedding = nn.functional.normalize(text_sequence, dim=-1)
         image_embedding = nn.functional.normalize(
             image_sequence,

diff --git a/torchmultimodal/modules/losses/mdetr.py b/torchmultimodal/modules/losses/mdetr.py
@@ -13,7 +13,7 @@
 
 
 def _get_src_permutation_idx(
-    indices: List[Tuple[Tensor, Tensor]]
+    indices: List[Tuple[Tensor, Tensor]],
 ) -> Tuple[Tensor, Tensor]:
     """
     Given a list of matched (src, tgt) indices, concatenate the src indices and
Original file line number	Diff line number	Diff line change
Expand Up		@@ -240,7 +240,6 @@ def collate_fn(tokenizer, batch):


		def build_flickr(image_set, tokenizer, transform, args):

		img_dir = Path(args.flickr_img_path) / f"{image_set}"

		if args.GT_type == "merged":
Expand Down
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,7 +31,6 @@ def evaluate( @@
         evaluator,
         device,
     ):
         model.eval()
         metric_logger = MetricLogger(delimiter="  ")
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,7 +47,6 @@ def set_seed():


		class TestVideoGPT:

		_model_name = "video_gpt"

		@pytest.fixture
Expand Down