Merge pull request #110 from mwalmsley/dev

Finetuning improvements
mwalmsley · Nov 13, 2023 · 3214941 · 3214941
2 parents 15a8216 + ffb99fd
commit 3214941
Show file tree

Hide file tree

Showing 10 changed files with 172 additions and 108 deletions.
diff --git a/.gitignore b/.gitignore
@@ -165,4 +165,6 @@ results
 
 hparams.yaml
 
-data/pretrained_models
+data/pretrained_models
+
+*.tar
diff --git a/benchmarks/pytorch/run_benchmarks.sh b/benchmarks/pytorch/run_benchmarks.sh
@@ -16,7 +16,7 @@ SEED=$RANDOM
 # effnet, greyscale and color
 # sbatch --job-name=evo_py_gr_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-sbatch --job-name=evo_py_co_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_co_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_co_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=128,RESIZE_AFTER_CROP=300,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
 
 # and resnet18
@@ -25,11 +25,13 @@ sbatch --job-name=evo_py_co_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,
 # and resnet50
 # sbatch --job-name=evo_py_gr_res50_224_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_res50_300_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-# and with max-vit tiny because hey transformers are cool
+# color 224 version
+sbatch --job-name=evo_py_co_res50_224_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 
+# and with max-vit tiny because hey transformers are cool
 # smaller batch size due to memory
-sbatch --job-name=evo_py_gr_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-sbatch --job-name=evo_py_co_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_gr_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_co_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 
 # and max-vit small (works badly)
 # sbatch --job-name=evo_py_gr_vitsmall_224_$SEED --export=ARCHITECTURE=maxvit_small_224,BATCH_SIZE=64,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB

diff --git a/docs/data_notes.rst b/docs/data_notes.rst
@@ -24,11 +24,6 @@ Zoobot includes weights for the following pretrained models.
      - 1 
      - Yes
      - `Link <https://www.dropbox.com/s/izvqagd6rkhi4lq/effnetb0_greyscale_300px.ckpt?dl=0>`__
-   * - EfficientNetB0 
-     - 300px
-     - 3
-     - Yes
-     - WIP
    * - EfficientNetB0 
      - 224px
      - 3
@@ -57,12 +52,12 @@ Zoobot includes weights for the following pretrained models.
    * - Max-ViT Tiny
      - 224px
      - 1
-     - Not yet
+     - Yes
      - `Link <https://www.dropbox.com/s/pndcgi6wxh9wuqb/maxvittiny_greyscale_224px.ckpt?dl=0>`__
    * - Max-ViT Tiny
      - 224px
      - 3
-     - Not yet
+     - Yes
      - `Link <https://www.dropbox.com/s/ibuo5n1tcaphvn3/maxvittiny_color_224px.ckpt?dl=0>`__
 
 
@@ -108,19 +103,19 @@ We also include a few additional ad-hoc models `on Dropbox <https://www.dropbox.
 Which model should I use?
 --------------------------
 
-We suggest the PyTorch EfficientNetB0 single-channel 300-pixel model for most users.
+We suggest the PyTorch EfficientNetB0 224-pixel model for most users.
 
 Zoobot will prioritise PyTorch going forward. For more, see here.
 The TensorFlow models currently perform just as well as the PyTorch equivalents but will not benefit from any future updates.
 
 EfficientNetB0 is a small yet capable modern architecture. 
 The ResNet50 models perform slightly worse than EfficientNet, but are a very common architecture and may be useful as benchmarks or as part of other frameworks (like detectron2, for segmentation).
 
-Color information does not improve overall performance at predicting GZ votes.
-This is a little surprising, but we're confident it's true for our datasets (see the benchmarks folder for our tests).
-However, it might be useful to include for other tasks where color is critical, such as hunting certain anomalous galaxies.
+It's unclear if color information improves overall performance at predicting GZ votes.
+For CNNs, the change in performance is not significant. For ViT, it is measureable but small.
+We suggesst including color if it is expected to be important to your specific task, such as hunting green peas.
 
-Larger input images (300px vs 224px) provide a small boost in performance at predicting GZ votes.
+Larger input images (300px vs 224px) may provide a small boost in performance at predicting GZ votes.
 However, the models require more memory and train/finetune slightly more slowly.
 You may want to start with a 224px model and experiment with "upgrading" once you're happy everything works.
 

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="zoobot",
-    version="1.0.4",
+    version="1.0.5",
     author="Mike Walmsley",
     author_email="[email protected]",
     description="Galaxy morphology classifiers",
@@ -61,12 +61,14 @@
             'timm == 0.6.12'
         ],
         'pytorch_colab': [
+            # colab includes pytorch already
             'pytorch-lightning >= 2.0.0',
             'albumentations',
             'pyro-ppl>=1.8.0',
             'torchmetrics==0.11.0',
             'timm == 0.6.12'
         ],
+        # TODO may add narval/Digital Research Canada config
         'tensorflow': [
             'tensorflow == 2.10.0',  # 2.11.0 turns on XLA somewhere which then fails on multi-GPU...TODO
             'keras_applications',
@@ -95,13 +97,12 @@
         'pandas',
         'scipy',
         'astropy',  # for reading fits
-        'scikit-image >= 0.19.2',
         'scikit-learn >= 1.0.2',
         'matplotlib',
         'pyarrow',  # to read parquet, which is very handy for big datasets
         # for saving metrics to weights&biases (cloud service, free within limits)
         'wandb',
-        'setuptools==59.5.0',  # wandb logger incompatibility
-        'galaxy-datasets==0.0.14'  # for dataset loading in both TF and Torch (renamed from pytorch-galaxy-datasets)
+        'setuptools',  # no longer pinned
+        'galaxy-datasets>=0.0.15'  # for dataset loading in both TF and Torch (see github/mwalmsley/galaxy-datasets)
     ]
 )
diff --git a/zoobot/pytorch/estimators/define_model.py b/zoobot/pytorch/estimators/define_model.py
@@ -85,7 +85,7 @@ def configure_optimizers(self):
     def training_step(self, batch, batch_idx):
         return self.make_step(batch, batch_idx, step_name='train')
 
-    def on_training_batch_end(self, outputs, *args):
+    def on_train_batch_end(self, outputs, *args):
         self.log_outputs(outputs, step_name='train')
 
     def validation_step(self, batch, batch_idx):
@@ -94,6 +94,9 @@ def validation_step(self, batch, batch_idx):
     def on_validation_batch_end(self, outputs, *args):
         self.log_outputs(outputs, step_name='validation')
 
+    def log_outputs(self, outputs, step_name):
+        raise NotImplementedError('Must be subclassed')
+
     def test_step(self, batch, batch_idx):
         return self.make_step(batch, batch_idx, step_name='test')
 

diff --git a/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py b/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py
@@ -27,9 +27,10 @@
     # To support more complicated labels, Zoobot expects a list of columns. A list with one element works fine.
 
     # load a pretrained checkpoint saved here
-    # checkpoint_loc = os.path.join(zoobot_dir, 'data/pretrained_models/temp/dr5_py_gr_2270/checkpoints/epoch=360-step=231762.ckpt')
-    checkpoint_loc = '/Users/user/repos/gz-decals-classifiers/results/benchmarks/pytorch/dr5/dr5_py_gr_15366/checkpoints/epoch=58-step=18939.ckpt'
-
+    # https://www.dropbox.com/s/7ixwo59imjfz4ay/effnetb0_greyscale_224px.ckpt?dl=0
+    # see https://zoobot.readthedocs.io/en/latest/data_notes.html for more options
+    checkpoint_loc = os.path.join(zoobot_dir, 'data/pretrained_models/pytorch/effnetb0_greyscale_224px.ckpt')
+
     # save the finetuning results here
     save_dir = os.path.join(zoobot_dir, 'results/pytorch/finetune/finetune_binary_classification')
 
@@ -70,8 +71,9 @@
       finetuned_model,
       n_samples=1,
       label_cols=label_cols,
-      save_loc=os.path.join(save_dir, 'finetuned_predictions.csv')
-      # trainer_kwargs={'accelerator': 'gpu'}
+      save_loc=os.path.join(save_dir, 'finetuned_predictions.csv'),
+      datamodule_kwargs={'batch_size': 32},  # we also need to set batch size here, or you may run out of memory
+      trainer_kwargs={'accelerator': 'gpu'}  
     )
     """
     Under the hood, this is essentially doing:

diff --git a/zoobot/pytorch/examples/finetuning/finetune_multiclass_classification.py b/zoobot/pytorch/examples/finetuning/finetune_multiclass_classification.py
@@ -1,50 +1,52 @@
 import logging
 import os
 
+import pandas as pd
+
 from zoobot.pytorch.training import finetune
-from galaxy_datasets import demo_rings
+from galaxy_datasets import galaxy_mnist
 from galaxy_datasets.pytorch.galaxy_datamodule import GalaxyDataModule
 
 
 if __name__ == '__main__':
 
     logging.basicConfig(level=logging.INFO)
 
-    zoobot_dir = '/Users/user/repos/zoobot'  # TODO set to directory where you cloned Zoobot
+    zoobot_dir = '/home/walml/repos/zoobot'  # TODO set to directory where you cloned Zoobot
+    data_dir = '/home/walml/repos/galaxy-datasets/roots/galaxy_mnist'  # TODO set to any directory. rings dataset will be downloaded here
+    batch_size = 32
+    num_workers= 8
+    n_blocks = 1  # EffnetB0 is divided into 7 blocks. set 0 to only fit the head weights. Set 1, 2, etc to finetune deeper. 
+    max_epochs = 1  #  6 epochs should get you ~93% accuracy. Set much higher (e.g. 1000) for harder problems, to use Zoobot's default early stopping. 
+    # the remaining key parameters for high accuracy are weight_decay, learning_rate, and lr_decay. You might like to tinker with these.
 
     # load in catalogs of images and labels to finetune on
     # each catalog should be a dataframe with columns of "id_str", "file_loc", and any labels
     # here I'm using galaxy-datasets to download some premade data - check it out for examples
-    data_dir = '/Users/user/repos/galaxy-datasets/roots/demo_rings'  # TODO set to any directory. rings dataset will be downloaded here
-    train_catalog, _ = demo_rings(root=data_dir, download=True, train=True)
-    test_catalog, _ = demo_rings(root=data_dir, download=True, train=False)
+
+    train_catalog, _ = galaxy_mnist(root=data_dir, download=True, train=True)
+    test_catalog, _ = galaxy_mnist(root=data_dir, download=True, train=False)
 
     # wondering about "label_cols"? 
     # This is a list of catalog columns which should be used as labels
-    # Here:
-    # TODO should use Galaxy MNIST as my example here
-    label_cols = ['ring']
-    # For binary classification, the label column should have binary (0 or 1) labels for your classes
-    import numpy as np
-    # 0, 1, 2
-    train_catalog['ring'] = np.random.randint(low=0, high=3, size=len(train_catalog))
-
-    # TODO
-    # To support more complicated labels, Zoobot expects a list of columns. A list with one element works fine.
-
+    # Here, it's a single column, 'label', with values 0-3 (for each of the 4 classes)
+    label_cols = ['label']
+    num_classes = 4
+
     # load a pretrained checkpoint saved here
     checkpoint_loc = os.path.join(zoobot_dir, 'data/pretrained_models/pytorch/effnetb0_greyscale_224px.ckpt')
-    # checkpoint_loc = '/Users/user/repos/gz-decals-classifiers/results/benchmarks/pytorch/dr5/dr5_py_gr_15366/checkpoints/epoch=58-step=18939.ckpt'
 
     # save the finetuning results here
     save_dir = os.path.join(zoobot_dir, 'results/pytorch/finetune/finetune_multiclass_classification')
 
     datamodule = GalaxyDataModule(
       label_cols=label_cols,
       catalog=train_catalog,  # very small, as a demo
-      batch_size=32
+      batch_size=batch_size,  # increase for faster training, decrease to avoid out-of-memory errors
+      num_workers=num_workers  # TODO set to a little less than num. CPUs
     )
-    # datamodule.setup()
+    datamodule.setup()
+    # optionally, check the data loads and looks okay
     # for images, labels in datamodule.train_dataloader():
     #   print(images.shape)
     #   print(labels.shape)
@@ -53,31 +55,38 @@
 
     model = finetune.FinetuneableZoobotClassifier(
       checkpoint_loc=checkpoint_loc,
-      num_classes=3,
-      n_layers=0  # only updating the head weights. Set e.g. 1, 2 to finetune deeper. 
+      num_classes=num_classes,
+      n_blocks=n_blocks
     )
     # under the hood, this does:
     # encoder = finetune.load_pretrained_encoder(checkpoint_loc)
     # model = finetune.FinetuneableZoobotClassifier(encoder=encoder, ...)
 
     # retrain to find rings
-    trainer = finetune.get_trainer(save_dir, accelerator='cpu', max_epochs=1)
+    trainer = finetune.get_trainer(save_dir, accelerator='auto', max_epochs=max_epochs)
     trainer.fit(model, datamodule)
     # can now use this model or saved checkpoint to make predictions on new data. Well done!
 
+    # see how well the model performs
+    # (don't do this all the time)
+    trainer.test(model, datamodule)
+
+    # we can load the model later any time
     # pretending we want to load from scratch:
     best_checkpoint = trainer.checkpoint_callback.best_model_path
     finetuned_model = finetune.FinetuneableZoobotClassifier.load_from_checkpoint(best_checkpoint)
 
     from zoobot.pytorch.predictions import predict_on_catalog
 
+    predictions_save_loc = os.path.join(save_dir, 'finetuned_predictions.csv')
     predict_on_catalog.predict(
       test_catalog,
       finetuned_model,
       n_samples=1,
-      label_cols=label_cols,
-      save_loc=os.path.join(save_dir, 'finetuned_predictions.csv')
-      # trainer_kwargs={'accelerator': 'gpu'}
+      label_cols=['class_{}'.format(n) for n in range(num_classes)],  # TODO feel free to rename, it's just for the csv header
+      save_loc=predictions_save_loc,
+      trainer_kwargs={'accelerator': 'auto'},
+      datamodule_kwargs={'batch_size': batch_size, 'num_workers': num_workers}
     )
     """
     Under the hood, this is essentially doing:
@@ -91,4 +100,9 @@
     )
     preds = predict_trainer.predict(finetuned_model, predict_datamodule)
     print(preds)
-    """
+    """
+
+    predictions = pd.read_csv(predictions_save_loc)
+    print(predictions)
+
+    exit()  # now over to you!
diff --git a/zoobot/pytorch/predictions/predict_on_catalog.py b/zoobot/pytorch/predictions/predict_on_catalog.py
@@ -11,7 +11,7 @@
 from galaxy_datasets.pytorch.galaxy_datamodule import GalaxyDataModule
 
 
-def predict(catalog: pd.DataFrame, model: pl.LightningModule, n_samples: int, label_cols: List, save_loc: str, datamodule_kwargs={}, trainer_kwargs={}):
+def predict(catalog: pd.DataFrame, model: pl.LightningModule, n_samples: int, label_cols: List, save_loc: str, datamodule_kwargs={}, trainer_kwargs={}) -> None:
     """
     Use trained model to make predictions on a catalog of galaxies.
 
@@ -55,12 +55,19 @@ def predict(catalog: pd.DataFrame, model: pl.LightningModule, n_samples: int, la
     start = datetime.datetime.fromtimestamp(time.time())
     logging.info('Starting at: {}'.format(start.strftime('%Y-%m-%d %H:%M:%S')))
 
-    logging.info(len(trainer.predict(model, predict_datamodule)))
+    # logging.info(len(trainer.predict(model, predict_datamodule)))
 
     # trainer.predict gives list of tensors, each tensor being predictions for a batch. Concat on axis 0.
     # range(n_samples) list comprehension repeats this, for dropout-permuted predictions. Stack to create new last axis.
     # final shape (n_galaxies, n_answers, n_samples)
-    predictions = torch.stack([torch.concat(trainer.predict(model, predict_datamodule), dim=0) for n in range(n_samples)], dim=-1).numpy()
+    predictions = torch.stack(
+        [   
+            # trainer.predict gives [(galaxy, answer), ...] list, batchwise
+            # concat batches
+            torch.concat(trainer.predict(model, predict_datamodule), dim=0)
+            for n in range(n_samples)
+        ], 
+        dim=-1).numpy()  # now stack on final dim for (galaxy, answer, dropout) shape
     logging.info('Predictions complete - {}'.format(predictions.shape))
 
     logging.info(f'Saving predictions to {save_loc}')