From 99ffea5af8aab2deface26cdff4c18eb8fac48b5 Mon Sep 17 00:00:00 2001 From: wenkelf Date: Fri, 18 Aug 2023 20:27:11 +0000 Subject: [PATCH 01/30] Adding largemix configs --- .../hydra-configs/architecture/largemix.yaml | 118 ++++++++++++++ expts/hydra-configs/tasks/largemix.yaml | 7 + .../loss_metrics_datamodule/largemix.yaml | 150 ++++++++++++++++++ .../tasks/task_heads/largemix.yaml | 59 +++++++ .../training/accelerator/largemix_cpu.yaml | 19 +++ .../training/accelerator/largemix_gpu.yaml | 22 +++ expts/hydra-configs/training/largemix.yaml | 29 ++++ .../training/model/largemix_gcn.yaml | 18 +++ 8 files changed, 422 insertions(+) create mode 100644 expts/hydra-configs/architecture/largemix.yaml create mode 100644 expts/hydra-configs/tasks/largemix.yaml create mode 100644 expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml create mode 100644 expts/hydra-configs/tasks/task_heads/largemix.yaml create mode 100644 expts/hydra-configs/training/accelerator/largemix_cpu.yaml create mode 100644 expts/hydra-configs/training/accelerator/largemix_gpu.yaml create mode 100644 expts/hydra-configs/training/largemix.yaml create mode 100644 expts/hydra-configs/training/model/largemix_gcn.yaml diff --git a/expts/hydra-configs/architecture/largemix.yaml b/expts/hydra-configs/architecture/largemix.yaml new file mode 100644 index 000000000..903d59ee0 --- /dev/null +++ b/expts/hydra-configs/architecture/largemix.yaml @@ -0,0 +1,118 @@ +# @package _global_ + +architecture: + model_type: FullGraphMultiTaskNetwork + mup_base_path: null + pre_nn: # Set as null to avoid a pre-nn network + out_dim: 64 + hidden_dims: 256 + depth: 2 + activation: relu + last_activation: none + dropout: &dropout 0.1 + normalization: &normalization layer_norm + last_normalization: *normalization + residual_type: none + + pre_nn_edges: null + + pe_encoders: + out_dim: 32 + pool: "sum" #"mean" "max" + last_norm: None #"batch_norm", "layer_norm" + encoders: #la_pos | rw_pos + la_pos: # Set as null to avoid a pre-nn network + encoder_type: "laplacian_pe" + input_keys: ["laplacian_eigvec", "laplacian_eigval"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + model_type: 'DeepSet' #'Transformer' or 'DeepSet' + num_layers: 2 + num_layers_post: 1 # Num. layers to apply after pooling + dropout: 0.1 + first_normalization: "none" #"batch_norm" or "layer_norm" + rw_pos: + encoder_type: "mlp" + input_keys: ["rw_return_probs"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + num_layers: 2 + dropout: 0.1 + normalization: "layer_norm" #"batch_norm" or "layer_norm" + first_normalization: "layer_norm" #"batch_norm" or "layer_norm" + + gnn: # Set as null to avoid a post-nn network + in_dim: 64 # or otherwise the correct value + out_dim: &gnn_dim 768 + hidden_dims: *gnn_dim + depth: 4 + activation: gelu + last_activation: none + dropout: 0.1 + normalization: "layer_norm" + last_normalization: *normalization + residual_type: simple + virtual_node: 'none' + + graph_output_nn: + graph: + pooling: [sum] + out_dim: *gnn_dim + hidden_dims: *gnn_dim + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + node: + pooling: [sum] + out_dim: *gnn_dim + hidden_dims: *gnn_dim + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + +datamodule: + module_type: "MultitaskFromSmilesDataModule" + args: + prepare_dict_or_graph: pyg:graph + featurization_n_jobs: 4 + featurization_progress: True + featurization_backend: "loky" + processed_graph_data_path: "../datacache/large-dataset/" + dataloading_from: "ram" + num_workers: 4 # -1 to use all + persistent_workers: True + featurization: + atom_property_list_onehot: [atomic-number, group, period, total-valence] + atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] + edge_property_list: [bond-type-onehot, stereo, in-ring] + add_self_loop: False + explicit_H: False # if H is included + use_bonds_weights: False + pos_encoding_as_features: # encoder dropout 0.18 + pos_types: + lap_eigvec: + pos_level: node + pos_type: laplacian_eigvec + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + lap_eigval: + pos_level: node + pos_type: laplacian_eigval + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + rw_pos: # use same name as pe_encoder + pos_level: node + pos_type: rw_return_probs + ksteps: 16 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/largemix.yaml b/expts/hydra-configs/tasks/largemix.yaml new file mode 100644 index 000000000..07417829c --- /dev/null +++ b/expts/hydra-configs/tasks/largemix.yaml @@ -0,0 +1,7 @@ +# NOTE: We cannot have a single config, since for fine-tuning we will +# only want to override the loss_metrics_datamodule, whereas for training we will +# want to override both. + +defaults: + - task_heads: largemix + - loss_metrics_datamodule: largemix \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml new file mode 100644 index 000000000..d8b0e7b26 --- /dev/null +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml @@ -0,0 +1,150 @@ +# @package _global_ + +predictor: + metrics_on_progress_bar: + l1000_vcap: [] + l1000_mcf7: [] + pcba_1328: [] + pcqm4m_g25: [] + pcqm4m_n4: [] + metrics_on_training_set: + l1000_vcap: [] + l1000_mcf7: [] + pcba_1328: [] + pcqm4m_g25: [] + pcqm4m_n4: [] + loss_fun: + l1000_vcap: + name: hybrid_ce_ipu + n_brackets: 5 + alpha: 0.5 + l1000_mcf7: + name: hybrid_ce_ipu + n_brackets: 5 + alpha: 0.5 + pcba_1328: bce_logits_ipu + pcqm4m_g25: mae_ipu + pcqm4m_n4: mae_ipu + +metrics: + l1000_vcap: &classif_metrics + - name: auroc + metric: auroc + num_classes: 5 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + - name: avpr + metric: averageprecision + num_classes: 5 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + l1000_mcf7: *classif_metrics + pcba_1328: + # use auroc and averageprecision (non_ipu version) so tha nans are handled correctly + - name: auroc + metric: auroc + task: binary + multitask_handling: mean-per-label + target_nan_mask: ignore + threshold_kwargs: null + - name: avpr + metric: averageprecision + task: binary + multitask_handling: mean-per-label + target_nan_mask: ignore + threshold_kwargs: null + pcqm4m_g25: &pcqm_metrics + - name: mae + metric: mae_ipu + target_nan_mask: null + multitask_handling: mean-per-label + threshold_kwargs: null + - name: pearsonr + metric: pearsonr_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + - name: r2 + metric: r2_score_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + pcqm4m_n4: *pcqm_metrics + +datamodule: + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + l1000_vcap: + df: null + df_path: expts/data/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: geneID-* # geneID-* means all columns starting with "geneID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: expts/data/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + epoch_sampling_fraction: 1.0 + + l1000_mcf7: + df: null + df_path: expts/data/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: geneID-* # geneID-* means all columns starting with "geneID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: expts/data/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + epoch_sampling_fraction: 1.0 + + pcba_1328: + df: null + df_path: expts/data/large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: assayID-* # assayID-* means all columns starting with "assayID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: expts/data/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + epoch_sampling_fraction: 1.0 + + pcqm4m_g25: + df: null + df_path: expts/data/large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # or set path as the URL directly + smiles_col: "ordered_smiles" + label_cols: graph_* # graph_* means all columns starting with "graph_" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: expts/data/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + label_normalization: + normalize_val_test: True + method: "normal" + epoch_sampling_fraction: 1.0 + + pcqm4m_n4: + df: null + df_path: expts/data/large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # or set path as the URL directly + smiles_col: "ordered_smiles" + label_cols: node_* # node_* means all columns starting with "node_" + # sample_size: 2000 # use sample_size for test + task_level: node + splits_path: expts/data/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + seed: ${constants.seed} + label_normalization: + normalize_val_test: True + method: "normal" + epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/task_heads/largemix.yaml b/expts/hydra-configs/tasks/task_heads/largemix.yaml new file mode 100644 index 000000000..92aab2827 --- /dev/null +++ b/expts/hydra-configs/tasks/task_heads/largemix.yaml @@ -0,0 +1,59 @@ +# @package _global_ + +architecture: + task_heads: + l1000_vcap: + task_level: graph + out_dim: 4890 + hidden_dims: 128 + depth: 2 + activation: none + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none + l1000_mcf7: + task_level: graph + out_dim: 4890 + hidden_dims: 128 + depth: 2 + activation: none + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none + pcba_1328: + task_level: graph + out_dim: 1328 + hidden_dims: 64 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none + pcqm4m_g25: + task_level: graph + out_dim: 25 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none + pcqm4m_n4: + task_level: node + out_dim: 4 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none \ No newline at end of file diff --git a/expts/hydra-configs/training/accelerator/largemix_cpu.yaml b/expts/hydra-configs/training/accelerator/largemix_cpu.yaml new file mode 100644 index 000000000..aedc6ff60 --- /dev/null +++ b/expts/hydra-configs/training/accelerator/largemix_cpu.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +datamodule: + args: + batch_size_training: 1000 + batch_size_inference: 1000 + featurization_n_jobs: 0 + num_workers: 0 + +predictor: + metrics_every_n_train_steps: 300 + torch_scheduler_kwargs: + max_num_epochs: ${constants.max_epochs} + +trainer: + trainer: + precision: 32 + accumulate_grad_batches: 2 + max_epochs: ${constants.max_epochs} \ No newline at end of file diff --git a/expts/hydra-configs/training/accelerator/largemix_gpu.yaml b/expts/hydra-configs/training/accelerator/largemix_gpu.yaml new file mode 100644 index 000000000..3025644de --- /dev/null +++ b/expts/hydra-configs/training/accelerator/largemix_gpu.yaml @@ -0,0 +1,22 @@ +# @package _global_ + +accelerator: + float32_matmul_precision: medium + +datamodule: + args: + batch_size_training: 1000 + batch_size_inference: 1000 + featurization_n_jobs: 0 + num_workers: 10 + +predictor: + metrics_every_n_train_steps: 300 + torch_scheduler_kwargs: + max_num_epochs: ${constants.max_epochs} + +trainer: + trainer: + precision: 16-mixed + accumulate_grad_batches: 2 + max_epochs: ${constants.max_epochs} \ No newline at end of file diff --git a/expts/hydra-configs/training/largemix.yaml b/expts/hydra-configs/training/largemix.yaml new file mode 100644 index 000000000..6426e7b57 --- /dev/null +++ b/expts/hydra-configs/training/largemix.yaml @@ -0,0 +1,29 @@ +# @package _global_ + +predictor: + random_seed: ${constants.seed} + optim_kwargs: + lr: 1.e-4 # warmup can be scheduled using torch_scheduler_kwargs + # weight_decay: 1.e-7 + torch_scheduler_kwargs: + module_type: WarmUpLinearLR + max_num_epochs: ${constants.max_epochs} + warmup_epochs: 10 + verbose: False + scheduler_kwargs: null + target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label + multitask_handling: flatten # flatten, mean-per-label + +trainer: + seed: ${constants.seed} + model_checkpoint: + dirpath: model_checkpoints/large-dataset/ + filename: ${constants.name} + save_last: True # saving last model + save_top_k: 1 # and best model + monitor: loss/val # wrt validation loss + trainer: + precision: 16-mixed + max_epochs: ${constants.max_epochs} + min_epochs: 1 + check_val_every_n_epoch: 1 \ No newline at end of file diff --git a/expts/hydra-configs/training/model/largemix_gcn.yaml b/expts/hydra-configs/training/model/largemix_gcn.yaml new file mode 100644 index 000000000..e2fb5d444 --- /dev/null +++ b/expts/hydra-configs/training/model/largemix_gcn.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +constants: + name: large_data_gcn + wandb: + name: ${constants.name} + project: neurips2023-expts + entity: multitask-gnn + save_dir: logs/${constants.name} + entity: multitask-gnn + seed: 42 + max_epochs: 100 + data_dir: expts/data/large-dataset + raise_train_error: true + +trainer: + model_checkpoint: + dirpath: model_checkpoints/large-dataset/gcn/ \ No newline at end of file From c59bd6386e6d049f5e93a9ef2a7c51d12c15ecb0 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Fri, 18 Aug 2023 20:08:39 -0400 Subject: [PATCH 02/30] Further improving caching --- graphium/data/datamodule.py | 33 ++++++++++++++++++++------------- graphium/data/dataset.py | 2 +- tests/test_datamodule.py | 6 ++---- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/graphium/data/datamodule.py b/graphium/data/datamodule.py index e8cab271d..873238b47 100644 --- a/graphium/data/datamodule.py +++ b/graphium/data/datamodule.py @@ -1194,14 +1194,15 @@ def setup( labels_size = {} labels_dtype = {} if stage == "fit" or stage is None: - # if self.train_ds is None: - self.train_ds = self._make_multitask_dataset( - self.dataloading_from, "train", save_smiles_and_ids=save_smiles_and_ids - ) - # if self.val_ds is None: - self.val_ds = self._make_multitask_dataset( - self.dataloading_from, "val", save_smiles_and_ids=save_smiles_and_ids - ) + if self.train_ds is None: + self.train_ds = self._make_multitask_dataset( + self.dataloading_from, "train", save_smiles_and_ids=save_smiles_and_ids + ) + + if self.val_ds is None: + self.val_ds = self._make_multitask_dataset( + self.dataloading_from, "val", save_smiles_and_ids=save_smiles_and_ids + ) logger.info(self.train_ds) logger.info(self.val_ds) @@ -1213,10 +1214,10 @@ def setup( labels_dtype.update(self.val_ds.labels_dtype) if stage == "test" or stage is None: - # if self.test_ds is None: - self.test_ds = self._make_multitask_dataset( - self.dataloading_from, "test", save_smiles_and_ids=save_smiles_and_ids - ) + if self.test_ds is None: + self.test_ds = self._make_multitask_dataset( + self.dataloading_from, "test", save_smiles_and_ids=save_smiles_and_ids + ) logger.info(self.test_ds) @@ -1335,7 +1336,13 @@ def _save_data_to_files(self, save_smiles_and_ids: bool = False) -> None: self.save_featurized_data(temp_datasets[stage], self._path_to_load_from_file(stage)) temp_datasets[stage].save_metadata(self._path_to_load_from_file(stage)) # self.train_ds, self.val_ds, self.test_ds will be created during `setup()` - del temp_datasets + + if self.dataloading_from == "disk": + del temp_datasets + else: + self.train_ds = temp_datasets["train"] + self.val_ds = temp_datasets["val"] + self.test_ds = temp_datasets["test"] def get_folder_size(self, path): # check if the data items are actually saved into the folders diff --git a/graphium/data/dataset.py b/graphium/data/dataset.py index 039d1b35a..964c97908 100644 --- a/graphium/data/dataset.py +++ b/graphium/data/dataset.py @@ -194,7 +194,7 @@ def __init__( self.features = None self.labels = None elif dataloading_from == "ram": - logger.info("Transferring data from DISK to RAM...") + logger.info(f"Transferring {about} from DISK to RAM...") self.transfer_from_disk_to_ram() else: diff --git a/tests/test_datamodule.py b/tests/test_datamodule.py index 2bc89200c..b00c042e2 100644 --- a/tests/test_datamodule.py +++ b/tests/test_datamodule.py @@ -55,8 +55,7 @@ def test_ogb_datamodule(self): rm(TEMP_CACHE_DATA_PATH, recursive=True) # Reset the datamodule - ds._data_is_prepared = False - ds._data_is_cached = False + ds = GraphOGBDataModule(task_specific_args, **dm_args) ds.prepare_data(save_smiles_and_ids=True) @@ -299,8 +298,7 @@ def test_caching(self): rm(TEMP_CACHE_DATA_PATH, recursive=True) # Reset the datamodule - ds._data_is_prepared = False - ds._data_is_cached = False + ds = GraphOGBDataModule(task_specific_args, processed_graph_data_path=TEMP_CACHE_DATA_PATH, **dm_args) ds.prepare_data(save_smiles_and_ids=True) From 290e3da08258d3feabf01490c4b8ef50106430c1 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Sun, 20 Aug 2023 13:06:48 -0400 Subject: [PATCH 03/30] Computing val/test metrics on cpu to save gpu memory --- graphium/trainer/predictor.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/graphium/trainer/predictor.py b/graphium/trainer/predictor.py index 3f2d2e676..cbc7efe07 100644 --- a/graphium/trainer/predictor.py +++ b/graphium/trainer/predictor.py @@ -539,18 +539,16 @@ def validation_step(self, batch: Dict[str, Tensor], to_cpu: bool = True) -> Dict def test_step(self, batch: Dict[str, Tensor], to_cpu: bool = True) -> Dict[str, Any]: return self._general_step(batch=batch, step_name="test", to_cpu=to_cpu) - def _general_epoch_end(self, outputs: Dict[str, Any], step_name: str) -> None: + def _general_epoch_end(self, outputs: Dict[str, Any], step_name: str, device: str) -> None: r"""Common code for training_epoch_end, validation_epoch_end and testing_epoch_end""" # Transform the list of dict of dict, into a dict of list of dict preds = {} targets = {} - device = device = outputs[0]["preds"][self.tasks[0]].device # should be better way to do this - # device = 0 for task in self.tasks: - preds[task] = torch.cat([out["preds"][task].to(device=device) for out in outputs], dim=0) - targets[task] = torch.cat([out["targets"][task].to(device=device) for out in outputs], dim=0) + preds[task] = torch.cat([out["preds"][task].to(device) for out in outputs], dim=0) + targets[task] = torch.cat([out["targets"][task].to(device) for out in outputs], dim=0) if ("weights" in outputs[0].keys()) and (outputs[0]["weights"] is not None): - weights = torch.cat([out["weights"] for out in outputs], dim=0) + weights = torch.cat([out["weights"].to(device) for out in outputs], dim=0) else: weights = None @@ -607,7 +605,7 @@ def on_validation_batch_end(self, outputs: Any, batch: Any, batch_idx: int) -> N return super().on_validation_batch_end(outputs, batch, batch_idx) def on_validation_epoch_end(self) -> None: - metrics_logs = self._general_epoch_end(outputs=self.validation_step_outputs, step_name="val") + metrics_logs = self._general_epoch_end(outputs=self.validation_step_outputs, step_name="val", device="cpu") self.validation_step_outputs.clear() concatenated_metrics_logs = self.task_epoch_summary.concatenate_metrics_logs(metrics_logs) concatenated_metrics_logs["val/mean_time"] = torch.tensor(self.mean_val_time_tracker.mean_value) @@ -627,7 +625,7 @@ def on_test_batch_end(self, outputs: Any, batch: Any, batch_idx: int) -> None: self.test_step_outputs.append(outputs) def on_test_epoch_end(self) -> None: - metrics_logs = self._general_epoch_end(outputs=self.test_step_outputs, step_name="test") + metrics_logs = self._general_epoch_end(outputs=self.test_step_outputs, step_name="test", device="cpu") self.test_step_outputs.clear() concatenated_metrics_logs = self.task_epoch_summary.concatenate_metrics_logs(metrics_logs) From ea9ff5d41286160aaf087c5b5f617b98174c161d Mon Sep 17 00:00:00 2001 From: WenkelF Date: Sun, 20 Aug 2023 17:17:29 -0400 Subject: [PATCH 04/30] Implementing testing for a model checkpoint --- README.md | 3 + .../hydra-configs/architecture/largemix.yaml | 2 +- expts/hydra-configs/architecture/toymix.yaml | 2 +- expts/hydra-configs/model/gine.yaml | 26 ++++++ .../loss_metrics_datamodule/largemix.yaml | 28 +++---- .../tasks/loss_metrics_datamodule/toymix.yaml | 2 +- .../tasks/task_heads/largemix.yaml | 4 +- .../training/accelerator/largemix_cpu.yaml | 5 +- .../training/accelerator/largemix_gpu.yaml | 3 +- .../training/accelerator/toymix_cpu.yaml | 8 +- .../training/accelerator/toymix_gpu.yaml | 8 +- expts/hydra-configs/training/largemix.yaml | 2 +- .../training/model/largemix_gine.yaml | 18 ++++ .../training/model/toymix_gcn.yaml | 12 ++- expts/hydra-configs/training/toymix.yaml | 15 ++-- graphium/cli/test.py | 82 +++++++++++++++++++ graphium/config/_loader.py | 21 +++++ graphium/trainer/predictor.py | 4 +- pyproject.toml | 3 +- 19 files changed, 206 insertions(+), 42 deletions(-) create mode 100644 expts/hydra-configs/model/gine.yaml create mode 100644 expts/hydra-configs/training/model/largemix_gine.yaml create mode 100644 graphium/cli/test.py diff --git a/README.md b/README.md index 1299e5f5b..6c4e567ec 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,9 @@ graphium-train --config-path [PATH] --config-name [CONFIG] ``` Thanks to the modular nature of `hydra` you can reuse many of our config settings for your own experiments with Graphium. +## Testing a model from a checkpoint +A model trained via `graphium-train [...]` can be tested using `graphium-test [...]` if the checkpoint section `trainer.model_checkpoint` has been specified in the config used for training. By default, the last model checkpoint is used for testing, but a specific model checkpoint can be selected by passing an additional argument `graphium-test [...] +ckpt_name_for_testing=[filename]`, which will look for a model checkpoint at `"[trainer.model_checkpoint.dirpath]/[filename].ckpt"`. + ## Preparing the data in advance The data preparation including the featurization (e.g., of molecules from smiles to pyg-compatible format) is embedded in the pipeline and will be performed when executing `graphium-train [...]`. diff --git a/expts/hydra-configs/architecture/largemix.yaml b/expts/hydra-configs/architecture/largemix.yaml index 903d59ee0..ae38ea2ac 100644 --- a/expts/hydra-configs/architecture/largemix.yaml +++ b/expts/hydra-configs/architecture/largemix.yaml @@ -88,7 +88,7 @@ datamodule: featurization_progress: True featurization_backend: "loky" processed_graph_data_path: "../datacache/large-dataset/" - dataloading_from: "ram" + dataloading_from: "disk" num_workers: 4 # -1 to use all persistent_workers: True featurization: diff --git a/expts/hydra-configs/architecture/toymix.yaml b/expts/hydra-configs/architecture/toymix.yaml index c79325919..677d6c7f9 100644 --- a/expts/hydra-configs/architecture/toymix.yaml +++ b/expts/hydra-configs/architecture/toymix.yaml @@ -78,7 +78,7 @@ datamodule: featurization_n_jobs: 30 featurization_progress: True featurization_backend: "loky" - processed_graph_data_path: "../datacache/neurips2023-small/" + processed_graph_data_path: "../datacache/small-dataset" dataloading_from: ram num_workers: 30 # -1 to use all persistent_workers: False diff --git a/expts/hydra-configs/model/gine.yaml b/expts/hydra-configs/model/gine.yaml new file mode 100644 index 000000000..50a4638f9 --- /dev/null +++ b/expts/hydra-configs/model/gine.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +architecture: + pre_nn_edges: # Set as null to avoid a pre-nn network + out_dim: 32 + hidden_dims: 128 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: ${architecture.pre_nn.normalization} + residual_type: none + + gnn: + out_dim: &gnn_dim 704 + hidden_dims: *gnn_dim + layer_type: 'pyg:gine' + + graph_output_nn: + graph: + out_dim: *gnn_dim + hidden_dims: *gnn_dim + node: + out_dim: *gnn_dim + hidden_dims: *gnn_dim diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml index d8b0e7b26..5a3f18f87 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml @@ -16,11 +16,11 @@ predictor: loss_fun: l1000_vcap: name: hybrid_ce_ipu - n_brackets: 5 + n_brackets: 3 alpha: 0.5 l1000_mcf7: name: hybrid_ce_ipu - n_brackets: 5 + n_brackets: 3 alpha: 0.5 pcba_1328: bce_logits_ipu pcqm4m_g25: mae_ipu @@ -30,7 +30,7 @@ metrics: l1000_vcap: &classif_metrics - name: auroc metric: auroc - num_classes: 5 + num_classes: 3 task: multiclass target_to_int: True target_nan_mask: -1000 @@ -39,7 +39,7 @@ metrics: threshold_kwargs: null - name: avpr metric: averageprecision - num_classes: 5 + num_classes: 3 task: multiclass target_to_int: True target_nan_mask: -1000 @@ -84,50 +84,50 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" l1000_vcap: df: null - df_path: expts/data/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: expts/data/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` epoch_sampling_fraction: 1.0 l1000_mcf7: df: null - df_path: expts/data/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: expts/data/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` epoch_sampling_fraction: 1.0 pcba_1328: df: null - df_path: expts/data/large-dataset/PCBA_1328_1564k.parquet + df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/PCBA_1328_1564k.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: expts/data/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` epoch_sampling_fraction: 1.0 pcqm4m_g25: df: null - df_path: expts/data/large-dataset/PCQM4M_G25_N4.parquet + df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: expts/data/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" @@ -135,14 +135,14 @@ datamodule: pcqm4m_n4: df: null - df_path: expts/data/large-dataset/PCQM4M_G25_N4.parquet + df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: expts/data/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: ${constants.seed} label_normalization: normalize_val_test: True diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml index 9ac744a52..8cbf0e0de 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml @@ -42,7 +42,7 @@ metrics: metric: f1 multitask_handling: mean-per-label target_to_int: True - num_classes: 2 + num_classes: 3 average: micro threshold_kwargs: &threshold_05 operator: greater diff --git a/expts/hydra-configs/tasks/task_heads/largemix.yaml b/expts/hydra-configs/tasks/task_heads/largemix.yaml index 92aab2827..e69c38d1d 100644 --- a/expts/hydra-configs/tasks/task_heads/largemix.yaml +++ b/expts/hydra-configs/tasks/task_heads/largemix.yaml @@ -4,7 +4,7 @@ architecture: task_heads: l1000_vcap: task_level: graph - out_dim: 4890 + out_dim: 2934 hidden_dims: 128 depth: 2 activation: none @@ -15,7 +15,7 @@ architecture: residual_type: none l1000_mcf7: task_level: graph - out_dim: 4890 + out_dim: 2934 hidden_dims: 128 depth: 2 activation: none diff --git a/expts/hydra-configs/training/accelerator/largemix_cpu.yaml b/expts/hydra-configs/training/accelerator/largemix_cpu.yaml index aedc6ff60..0958e43ed 100644 --- a/expts/hydra-configs/training/accelerator/largemix_cpu.yaml +++ b/expts/hydra-configs/training/accelerator/largemix_cpu.yaml @@ -2,10 +2,11 @@ datamodule: args: - batch_size_training: 1000 - batch_size_inference: 1000 + batch_size_training: 200 + batch_size_inference: 200 featurization_n_jobs: 0 num_workers: 0 + persistent_workers: false predictor: metrics_every_n_train_steps: 300 diff --git a/expts/hydra-configs/training/accelerator/largemix_gpu.yaml b/expts/hydra-configs/training/accelerator/largemix_gpu.yaml index 3025644de..281dfc26d 100644 --- a/expts/hydra-configs/training/accelerator/largemix_gpu.yaml +++ b/expts/hydra-configs/training/accelerator/largemix_gpu.yaml @@ -8,7 +8,8 @@ datamodule: batch_size_training: 1000 batch_size_inference: 1000 featurization_n_jobs: 0 - num_workers: 10 + num_workers: 0 + persistent_workers: false predictor: metrics_every_n_train_steps: 300 diff --git a/expts/hydra-configs/training/accelerator/toymix_cpu.yaml b/expts/hydra-configs/training/accelerator/toymix_cpu.yaml index 9022eeb84..9375aaf46 100644 --- a/expts/hydra-configs/training/accelerator/toymix_cpu.yaml +++ b/expts/hydra-configs/training/accelerator/toymix_cpu.yaml @@ -2,10 +2,10 @@ datamodule: args: - batch_size_training: 200 - batch_size_inference: 200 - featurization_n_jobs: 4 - num_workers: 4 + batch_size_training: 1000 + batch_size_inference: 1000 + featurization_n_jobs: 0 + num_workers: 0 predictor: optim_kwargs: {} diff --git a/expts/hydra-configs/training/accelerator/toymix_gpu.yaml b/expts/hydra-configs/training/accelerator/toymix_gpu.yaml index c2c8e4066..c88145804 100644 --- a/expts/hydra-configs/training/accelerator/toymix_gpu.yaml +++ b/expts/hydra-configs/training/accelerator/toymix_gpu.yaml @@ -5,10 +5,10 @@ accelerator: datamodule: args: - batch_size_training: 200 - batch_size_inference: 200 - featurization_n_jobs: 4 - num_workers: 4 + batch_size_training: 1000 + batch_size_inference: 1000 + featurization_n_jobs: 0 + num_workers: 0 predictor: optim_kwargs: {} diff --git a/expts/hydra-configs/training/largemix.yaml b/expts/hydra-configs/training/largemix.yaml index 6426e7b57..fb0e486b6 100644 --- a/expts/hydra-configs/training/largemix.yaml +++ b/expts/hydra-configs/training/largemix.yaml @@ -18,7 +18,7 @@ trainer: seed: ${constants.seed} model_checkpoint: dirpath: model_checkpoints/large-dataset/ - filename: ${constants.name} + filename: best save_last: True # saving last model save_top_k: 1 # and best model monitor: loss/val # wrt validation loss diff --git a/expts/hydra-configs/training/model/largemix_gine.yaml b/expts/hydra-configs/training/model/largemix_gine.yaml new file mode 100644 index 000000000..cce70c305 --- /dev/null +++ b/expts/hydra-configs/training/model/largemix_gine.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +constants: + name: large_data_gine + wandb: + name: ${constants.name} + project: neurips2023-expts + entity: multitask-gnn + save_dir: logs/${constants.name} + entity: multitask-gnn + seed: 42 + max_epochs: 100 + data_dir: expts/data/large-dataset + raise_train_error: true + +trainer: + model_checkpoint: + dirpath: model_checkpoints/large-dataset/gine/ \ No newline at end of file diff --git a/expts/hydra-configs/training/model/toymix_gcn.yaml b/expts/hydra-configs/training/model/toymix_gcn.yaml index 48eabe003..f1df5618c 100644 --- a/expts/hydra-configs/training/model/toymix_gcn.yaml +++ b/expts/hydra-configs/training/model/toymix_gcn.yaml @@ -1,12 +1,18 @@ # @package _global_ constants: - name: neurips2023_small_data_gcn + name: small_data_gcn + wandb: + name: ${constants.name} + project: neurips2023-expts + entity: multitask-gnn + save_dir: logs/${constants.name} + entity: multitask-gnn seed: 42 max_epochs: 100 - data_dir: expts/data/neurips2023/small-dataset + data_dir: expts/data/small-dataset raise_train_error: true trainer: model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gcn/ \ No newline at end of file + dirpath: model_checkpoints/small-dataset/gcn/ \ No newline at end of file diff --git a/expts/hydra-configs/training/toymix.yaml b/expts/hydra-configs/training/toymix.yaml index 4afcbd56a..8be3fea04 100644 --- a/expts/hydra-configs/training/toymix.yaml +++ b/expts/hydra-configs/training/toymix.yaml @@ -3,7 +3,7 @@ predictor: random_seed: ${constants.seed} optim_kwargs: - lr: 4.e-5 # warmup can be scheduled using torch_scheduler_kwargs + lr: 1.e-4 # warmup can be scheduled using torch_scheduler_kwargs # weight_decay: 1.e-7 torch_scheduler_kwargs: module_type: WarmUpLinearLR @@ -11,16 +11,19 @@ predictor: warmup_epochs: 10 verbose: False scheduler_kwargs: null - target_nan_mask: null + target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label multitask_handling: flatten # flatten, mean-per-label trainer: seed: ${constants.seed} model_checkpoint: - filename: ${constants.name} - save_last: True + dirpath: model_checkpoints/large-dataset/ + filename: ${constants.name}-{epoch} + save_last: True # saving last model + save_top_k: 1 # and best model + monitor: loss/val # wrt validation loss trainer: - precision: 16 + precision: 16-mixed max_epochs: ${constants.max_epochs} min_epochs: 1 - check_val_every_n_epoch: 20 \ No newline at end of file + check_val_every_n_epoch: 1 \ No newline at end of file diff --git a/graphium/cli/test.py b/graphium/cli/test.py new file mode 100644 index 000000000..ac9bcf6c3 --- /dev/null +++ b/graphium/cli/test.py @@ -0,0 +1,82 @@ +import hydra +import wandb +import timeit + +from omegaconf import DictConfig, OmegaConf +from loguru import logger +from datetime import datetime +from lightning.pytorch.utilities.model_summary import ModelSummary +from graphium.trainer.predictor import PredictorModule + +from graphium.config._loader import ( + load_datamodule, + get_checkpoint_path, + load_trainer, + load_accelerator, +) +from graphium.utils.safe_run import SafeRun + + +@hydra.main(version_base=None, config_path="../../expts/hydra-configs", config_name="main") +def cli(cfg: DictConfig) -> None: + """ + CLI endpoint for running test step on model checkpoints. + """ + run_testing(cfg) + + +def run_testing(cfg: DictConfig) -> None: + """ + The main (pre-)training and fine-tuning loop. + """ + + cfg = OmegaConf.to_container(cfg, resolve=True) + + st = timeit.default_timer() + + wandb_cfg = cfg["constants"].get("wandb") + if wandb_cfg is not None: + wandb.init( + entity=wandb_cfg["entity"], + project=wandb_cfg["project"], + config=cfg, + ) + + ## == Instantiate all required objects from their respective configs == + # Accelerator + cfg, accelerator_type = load_accelerator(cfg) + + ## Data-module + datamodule = load_datamodule(cfg, accelerator_type) + + ## Load Predictor + predictor = PredictorModule.load_from_checkpoint(checkpoint_path=get_checkpoint_path(cfg)) + + ## Load Trainer + date_time_suffix = datetime.now().strftime("%d.%m.%Y_%H.%M.%S") + trainer = load_trainer(cfg, accelerator_type, date_time_suffix) + + # Determine the max num nodes and edges in testing + datamodule.setup(stage="test") + + max_nodes = datamodule.get_max_num_nodes_datamodule(stages=["test"]) + max_edges = datamodule.get_max_num_edges_datamodule(stages=["test"]) + + predictor.model.set_max_num_nodes_edges_per_graph(max_nodes, max_edges) + + # Run the model testing + with SafeRun(name="TESTING", raise_error=cfg["constants"]["raise_train_error"], verbose=True): + trainer.test(model=predictor, datamodule=datamodule) + + logger.info("-" * 50) + logger.info("Total compute time:", timeit.default_timer() - st) + logger.info("-" * 50) + + if wandb_cfg is not None: + wandb.finish() + + return trainer.callback_metrics + + +if __name__ == "__main__": + cli() diff --git a/graphium/config/_loader.py b/graphium/config/_loader.py index 3c7a654e9..27fec134f 100644 --- a/graphium/config/_loader.py +++ b/graphium/config/_loader.py @@ -576,3 +576,24 @@ def merge_dicts( elif on_exist == "ignore": pass return dict_a + + +def get_checkpoint_path(config: Union[omegaconf.DictConfig, Dict[str, Any]]) -> str: + """ + Get the checkpoint path from a config file. + """ + + cfg_trainer = config["trainer"] + + if "model_checkpoint" in cfg_trainer.keys(): + dirpath = cfg_trainer["model_checkpoint"]["dirpath"] + str(cfg_trainer["seed"]) + "/" + filename = config.get("ckpt_name_for_testing", "last") + ".ckpt" + else: + raise ValueError("Empty checkpoint section in config file") + + checkpoint_path = os.path.join(dirpath, filename) + + if not os.path.exists(checkpoint_path): + raise ValueError(f"Checkpoint path `{checkpoint_path}` does not exist") + + return checkpoint_path diff --git a/graphium/trainer/predictor.py b/graphium/trainer/predictor.py index cbc7efe07..9a76c6b77 100644 --- a/graphium/trainer/predictor.py +++ b/graphium/trainer/predictor.py @@ -605,7 +605,9 @@ def on_validation_batch_end(self, outputs: Any, batch: Any, batch_idx: int) -> N return super().on_validation_batch_end(outputs, batch, batch_idx) def on_validation_epoch_end(self) -> None: - metrics_logs = self._general_epoch_end(outputs=self.validation_step_outputs, step_name="val", device="cpu") + metrics_logs = self._general_epoch_end( + outputs=self.validation_step_outputs, step_name="val", device="cpu" + ) self.validation_step_outputs.clear() concatenated_metrics_logs = self.task_epoch_summary.concatenate_metrics_logs(metrics_logs) concatenated_metrics_logs["val/mean_time"] = torch.tensor(self.mean_val_time_tracker.mean_value) diff --git a/pyproject.toml b/pyproject.toml index 20cfa9792..5b8bde087 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,8 +64,9 @@ dependencies = [ [project.scripts] graphium = "graphium.cli.main:main_cli" - graphium-train = "graphium.cli.train_finetune:cli" graphium-prepare-data = "graphium.cli.prepare_data:cli" + graphium-train = "graphium.cli.train_finetune:cli" + graphium-test = "graphium.cli.test:cli" [project.urls] Website = "https://graphium.datamol.io/" From fd6e9325f4f42336e635e46e57729cf5cff1aedf Mon Sep 17 00:00:00 2001 From: wenkelf Date: Wed, 23 Aug 2023 18:11:30 +0000 Subject: [PATCH 05/30] Adding single dataset configs for LargeMix to hydra --- .../hydra-configs/architecture/largemix.yaml | 4 +- expts/hydra-configs/architecture/toymix.yaml | 2 +- expts/hydra-configs/tasks/l1000_mcf7.yaml | 7 +++ expts/hydra-configs/tasks/l1000_vcap.yaml | 7 +++ .../loss_metrics_datamodule/l1000_mcf7.yaml | 48 +++++++++++++++++++ .../loss_metrics_datamodule/l1000_vcap.yaml | 48 +++++++++++++++++++ .../loss_metrics_datamodule/largemix.yaml | 24 +++++----- .../loss_metrics_datamodule/pcba_1328.yaml | 40 ++++++++++++++++ .../tasks/loss_metrics_datamodule/toymix.yaml | 2 +- expts/hydra-configs/tasks/pcba_1328.yaml | 7 +++ .../tasks/task_heads/l1000_mcf7.yaml | 15 ++++++ .../tasks/task_heads/l1000_vcap.yaml | 15 ++++++ .../tasks/task_heads/pcba_1328.yaml | 15 ++++++ .../training/accelerator/largemix_cpu.yaml | 7 ++- .../training/accelerator/largemix_gpu.yaml | 13 +++-- .../training/accelerator/toymix_cpu.yaml | 8 ++-- .../training/accelerator/toymix_gpu.yaml | 8 ++-- expts/hydra-configs/training/largemix.yaml | 18 ++++--- .../training/model/largemix_gcn.yaml | 2 +- .../training/model/largemix_gin.yaml | 18 +++++++ .../training/model/largemix_gine.yaml | 2 +- .../training/model/toymix_gcn.yaml | 12 ++--- expts/hydra-configs/training/toymix.yaml | 17 +++---- graphium/cli/test.py | 2 +- scripts/featurize/featurize.sh | 22 +++++++++ scripts/test/gcn/test-best-th2_gcn_v100.sh | 25 ++++++++++ scripts/test/gcn/test-last-th2_gcn_a100.sh | 24 ++++++++++ scripts/test/gin/test-best-th2_gin_v100.sh | 25 ++++++++++ scripts/test/gin/test-last-th2_gin_a100.sh | 24 ++++++++++ scripts/test/gine/test-best-th2_gine_v100.sh | 25 ++++++++++ scripts/test/gine/test-last-th2_gine_a100.sh | 24 ++++++++++ scripts/test/gine/test-last-th2_gine_a112.sh | 24 ++++++++++ scripts/test/test-on-cpu.sh | 25 ++++++++++ .../train/gcn/gpu/large_th2_gcn_a100_disk.sh | 24 ++++++++++ .../train/gcn/gpu/large_th3_gcn_a100_disk.sh | 24 ++++++++++ .../train/gcn/gpu/large_th4_gcn_a100_disk.sh | 24 ++++++++++ .../train/gin/gpu/large_th2_gin_a100_disk.sh | 24 ++++++++++ .../train/gin/gpu/large_th3_gin_a100_disk.sh | 24 ++++++++++ .../train/gin/gpu/large_th4_gin_a100_disk.sh | 24 ++++++++++ .../gine/cpu/large_th2_gine_c112_disk.sh | 24 ++++++++++ .../gine/cpu/large_th3_gine_c112_disk.sh | 24 ++++++++++ .../gine/cpu/large_th4_gine_c112_disk.sh | 24 ++++++++++ .../gine/gpu/large_th2_gine_a100_disk.sh | 24 ++++++++++ .../gine/gpu/large_th3_gine_a100_disk.sh | 24 ++++++++++ .../gine/gpu/large_th4_gine_a100_disk.sh | 24 ++++++++++ scripts/train/sweep.sh | 21 ++++++++ scripts/train/test.sh | 6 +++ scripts/train/train.sh | 27 +++++++++++ 48 files changed, 837 insertions(+), 64 deletions(-) create mode 100644 expts/hydra-configs/tasks/l1000_mcf7.yaml create mode 100644 expts/hydra-configs/tasks/l1000_vcap.yaml create mode 100644 expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml create mode 100644 expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml create mode 100644 expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml create mode 100644 expts/hydra-configs/tasks/pcba_1328.yaml create mode 100644 expts/hydra-configs/tasks/task_heads/l1000_mcf7.yaml create mode 100644 expts/hydra-configs/tasks/task_heads/l1000_vcap.yaml create mode 100644 expts/hydra-configs/tasks/task_heads/pcba_1328.yaml create mode 100644 expts/hydra-configs/training/model/largemix_gin.yaml create mode 100644 scripts/featurize/featurize.sh create mode 100644 scripts/test/gcn/test-best-th2_gcn_v100.sh create mode 100644 scripts/test/gcn/test-last-th2_gcn_a100.sh create mode 100644 scripts/test/gin/test-best-th2_gin_v100.sh create mode 100644 scripts/test/gin/test-last-th2_gin_a100.sh create mode 100644 scripts/test/gine/test-best-th2_gine_v100.sh create mode 100644 scripts/test/gine/test-last-th2_gine_a100.sh create mode 100644 scripts/test/gine/test-last-th2_gine_a112.sh create mode 100644 scripts/test/test-on-cpu.sh create mode 100644 scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh create mode 100644 scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh create mode 100644 scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh create mode 100644 scripts/train/gin/gpu/large_th2_gin_a100_disk.sh create mode 100644 scripts/train/gin/gpu/large_th3_gin_a100_disk.sh create mode 100644 scripts/train/gin/gpu/large_th4_gin_a100_disk.sh create mode 100644 scripts/train/gine/cpu/large_th2_gine_c112_disk.sh create mode 100644 scripts/train/gine/cpu/large_th3_gine_c112_disk.sh create mode 100644 scripts/train/gine/cpu/large_th4_gine_c112_disk.sh create mode 100644 scripts/train/gine/gpu/large_th2_gine_a100_disk.sh create mode 100644 scripts/train/gine/gpu/large_th3_gine_a100_disk.sh create mode 100644 scripts/train/gine/gpu/large_th4_gine_a100_disk.sh create mode 100644 scripts/train/sweep.sh create mode 100644 scripts/train/test.sh create mode 100644 scripts/train/train.sh diff --git a/expts/hydra-configs/architecture/largemix.yaml b/expts/hydra-configs/architecture/largemix.yaml index ae38ea2ac..e56108572 100644 --- a/expts/hydra-configs/architecture/largemix.yaml +++ b/expts/hydra-configs/architecture/largemix.yaml @@ -84,12 +84,12 @@ datamodule: module_type: "MultitaskFromSmilesDataModule" args: prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 4 + featurization_n_jobs: 20 featurization_progress: True featurization_backend: "loky" processed_graph_data_path: "../datacache/large-dataset/" dataloading_from: "disk" - num_workers: 4 # -1 to use all + num_workers: 20 # -1 to use all persistent_workers: True featurization: atom_property_list_onehot: [atomic-number, group, period, total-valence] diff --git a/expts/hydra-configs/architecture/toymix.yaml b/expts/hydra-configs/architecture/toymix.yaml index 677d6c7f9..c79325919 100644 --- a/expts/hydra-configs/architecture/toymix.yaml +++ b/expts/hydra-configs/architecture/toymix.yaml @@ -78,7 +78,7 @@ datamodule: featurization_n_jobs: 30 featurization_progress: True featurization_backend: "loky" - processed_graph_data_path: "../datacache/small-dataset" + processed_graph_data_path: "../datacache/neurips2023-small/" dataloading_from: ram num_workers: 30 # -1 to use all persistent_workers: False diff --git a/expts/hydra-configs/tasks/l1000_mcf7.yaml b/expts/hydra-configs/tasks/l1000_mcf7.yaml new file mode 100644 index 000000000..b6ffdfde7 --- /dev/null +++ b/expts/hydra-configs/tasks/l1000_mcf7.yaml @@ -0,0 +1,7 @@ +# NOTE: We cannot have a single config, since for fine-tuning we will +# only want to override the loss_metrics_datamodule, whereas for training we will +# want to override both. + +defaults: + - task_heads: l1000_mcf7 + - loss_metrics_datamodule: l1000_mcf7 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/l1000_vcap.yaml b/expts/hydra-configs/tasks/l1000_vcap.yaml new file mode 100644 index 000000000..e212a4594 --- /dev/null +++ b/expts/hydra-configs/tasks/l1000_vcap.yaml @@ -0,0 +1,7 @@ +# NOTE: We cannot have a single config, since for fine-tuning we will +# only want to override the loss_metrics_datamodule, whereas for training we will +# want to override both. + +defaults: + - task_heads: l1000_vcap + - loss_metrics_datamodule: l1000_vcap \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml new file mode 100644 index 000000000..ec48f2e22 --- /dev/null +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml @@ -0,0 +1,48 @@ +# @package _global_ + +predictor: + metrics_on_progress_bar: + l1000_mcf7: [] + metrics_on_training_set: + l1000_mcf7: [] + loss_fun: + l1000_mcf7: + name: hybrid_ce_ipu + n_brackets: 3 + alpha: 0.5 + +metrics: + l1000_mcf7: + - name: auroc + metric: auroc + num_classes: 3 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + - name: avpr + metric: averageprecision + num_classes: 3 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + +datamodule: + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + l1000_mcf7: + df: null + df_path: ../data/graphium/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: geneID-* # geneID-* means all columns starting with "geneID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml new file mode 100644 index 000000000..9470ec903 --- /dev/null +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml @@ -0,0 +1,48 @@ +# @package _global_ + +predictor: + metrics_on_progress_bar: + l1000_vcap: [] + metrics_on_training_set: + l1000_vcap: [] + loss_fun: + l1000_vcap: + name: hybrid_ce_ipu + n_brackets: 3 + alpha: 0.5 + +metrics: + l1000_vcap: + - name: auroc + metric: auroc + num_classes: 3 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + - name: avpr + metric: averageprecision + num_classes: 3 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + +datamodule: + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + l1000_vcap: + df: null + df_path: ../data/graphium/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: geneID-* # geneID-* means all columns starting with "geneID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml index 5a3f18f87..96d550cef 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml @@ -21,7 +21,7 @@ predictor: l1000_mcf7: name: hybrid_ce_ipu n_brackets: 3 - alpha: 0.5 + alpha: ${predictor.loss_fun.l1000_vcap.alpha} pcba_1328: bce_logits_ipu pcqm4m_g25: mae_ipu pcqm4m_n4: mae_ipu @@ -84,50 +84,50 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" l1000_vcap: df: null - df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + df_path: ../data/graphium/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` epoch_sampling_fraction: 1.0 l1000_mcf7: df: null - df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + df_path: ../data/graphium/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` epoch_sampling_fraction: 1.0 pcba_1328: df: null - df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/PCBA_1328_1564k.parquet + df_path: ../data/graphium/large-dataset/PCBA_1328_1564k.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` epoch_sampling_fraction: 1.0 pcqm4m_g25: df: null - df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/PCQM4M_G25_N4.parquet + df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" @@ -135,15 +135,15 @@ datamodule: pcqm4m_n4: df: null - df_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/PCQM4M_G25_N4.parquet + df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: /home/mila/f/frederik.wenkel/scratch/data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` - seed: ${constants.seed} + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + seed: 42 label_normalization: normalize_val_test: True method: "normal" diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml new file mode 100644 index 000000000..367521338 --- /dev/null +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml @@ -0,0 +1,40 @@ +# @package _global_ + +predictor: + metrics_on_progress_bar: + pcba_1328: [] + metrics_on_training_set: + pcba_1328: [] + loss_fun: + pcba_1328: bce_logits_ipu + +metrics: + pcba_1328: + # use auroc and averageprecision (non_ipu version) so tha nans are handled correctly + - name: auroc + metric: auroc + task: binary + multitask_handling: mean-per-label + target_nan_mask: ignore + threshold_kwargs: null + - name: avpr + metric: averageprecision + task: binary + multitask_handling: mean-per-label + target_nan_mask: ignore + threshold_kwargs: null + +datamodule: + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + pcba_1328: + df: null + df_path: ../data/graphium/large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: assayID-* # assayID-* means all columns starting with "assayID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml index 8cbf0e0de..9ac744a52 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml @@ -42,7 +42,7 @@ metrics: metric: f1 multitask_handling: mean-per-label target_to_int: True - num_classes: 3 + num_classes: 2 average: micro threshold_kwargs: &threshold_05 operator: greater diff --git a/expts/hydra-configs/tasks/pcba_1328.yaml b/expts/hydra-configs/tasks/pcba_1328.yaml new file mode 100644 index 000000000..61b5e7b29 --- /dev/null +++ b/expts/hydra-configs/tasks/pcba_1328.yaml @@ -0,0 +1,7 @@ +# NOTE: We cannot have a single config, since for fine-tuning we will +# only want to override the loss_metrics_datamodule, whereas for training we will +# want to override both. + +defaults: + - task_heads: pcba_1328 + - loss_metrics_datamodule: pcba_1328 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/task_heads/l1000_mcf7.yaml b/expts/hydra-configs/tasks/task_heads/l1000_mcf7.yaml new file mode 100644 index 000000000..c449a03f1 --- /dev/null +++ b/expts/hydra-configs/tasks/task_heads/l1000_mcf7.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +architecture: + task_heads: + l1000_mcf7: + task_level: graph + out_dim: 2934 + hidden_dims: 128 + depth: 2 + activation: none + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none \ No newline at end of file diff --git a/expts/hydra-configs/tasks/task_heads/l1000_vcap.yaml b/expts/hydra-configs/tasks/task_heads/l1000_vcap.yaml new file mode 100644 index 000000000..a71e75709 --- /dev/null +++ b/expts/hydra-configs/tasks/task_heads/l1000_vcap.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +architecture: + task_heads: + l1000_vcap: + task_level: graph + out_dim: 2934 + hidden_dims: 128 + depth: 2 + activation: none + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none \ No newline at end of file diff --git a/expts/hydra-configs/tasks/task_heads/pcba_1328.yaml b/expts/hydra-configs/tasks/task_heads/pcba_1328.yaml new file mode 100644 index 000000000..498c89e98 --- /dev/null +++ b/expts/hydra-configs/tasks/task_heads/pcba_1328.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +architecture: + task_heads: + pcba_1328: + task_level: graph + out_dim: 1328 + hidden_dims: 64 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none \ No newline at end of file diff --git a/expts/hydra-configs/training/accelerator/largemix_cpu.yaml b/expts/hydra-configs/training/accelerator/largemix_cpu.yaml index 0958e43ed..6f5e0606a 100644 --- a/expts/hydra-configs/training/accelerator/largemix_cpu.yaml +++ b/expts/hydra-configs/training/accelerator/largemix_cpu.yaml @@ -4,12 +4,11 @@ datamodule: args: batch_size_training: 200 batch_size_inference: 200 - featurization_n_jobs: 0 - num_workers: 0 - persistent_workers: false + featurization_n_jobs: 20 + num_workers: 20 predictor: - metrics_every_n_train_steps: 300 + metrics_every_n_train_steps: 1000 torch_scheduler_kwargs: max_num_epochs: ${constants.max_epochs} diff --git a/expts/hydra-configs/training/accelerator/largemix_gpu.yaml b/expts/hydra-configs/training/accelerator/largemix_gpu.yaml index 281dfc26d..06f2b9d5e 100644 --- a/expts/hydra-configs/training/accelerator/largemix_gpu.yaml +++ b/expts/hydra-configs/training/accelerator/largemix_gpu.yaml @@ -5,19 +5,18 @@ accelerator: datamodule: args: - batch_size_training: 1000 - batch_size_inference: 1000 - featurization_n_jobs: 0 - num_workers: 0 - persistent_workers: false + batch_size_training: 960 + batch_size_inference: 960 + featurization_n_jobs: 6 + num_workers: 6 predictor: - metrics_every_n_train_steps: 300 + metrics_every_n_train_steps: 1000 torch_scheduler_kwargs: max_num_epochs: ${constants.max_epochs} trainer: trainer: precision: 16-mixed - accumulate_grad_batches: 2 + # accumulate_grad_batches: 2 max_epochs: ${constants.max_epochs} \ No newline at end of file diff --git a/expts/hydra-configs/training/accelerator/toymix_cpu.yaml b/expts/hydra-configs/training/accelerator/toymix_cpu.yaml index 9375aaf46..9022eeb84 100644 --- a/expts/hydra-configs/training/accelerator/toymix_cpu.yaml +++ b/expts/hydra-configs/training/accelerator/toymix_cpu.yaml @@ -2,10 +2,10 @@ datamodule: args: - batch_size_training: 1000 - batch_size_inference: 1000 - featurization_n_jobs: 0 - num_workers: 0 + batch_size_training: 200 + batch_size_inference: 200 + featurization_n_jobs: 4 + num_workers: 4 predictor: optim_kwargs: {} diff --git a/expts/hydra-configs/training/accelerator/toymix_gpu.yaml b/expts/hydra-configs/training/accelerator/toymix_gpu.yaml index c88145804..c2c8e4066 100644 --- a/expts/hydra-configs/training/accelerator/toymix_gpu.yaml +++ b/expts/hydra-configs/training/accelerator/toymix_gpu.yaml @@ -5,10 +5,10 @@ accelerator: datamodule: args: - batch_size_training: 1000 - batch_size_inference: 1000 - featurization_n_jobs: 0 - num_workers: 0 + batch_size_training: 200 + batch_size_inference: 200 + featurization_n_jobs: 4 + num_workers: 4 predictor: optim_kwargs: {} diff --git a/expts/hydra-configs/training/largemix.yaml b/expts/hydra-configs/training/largemix.yaml index fb0e486b6..cb95fb2fe 100644 --- a/expts/hydra-configs/training/largemix.yaml +++ b/expts/hydra-configs/training/largemix.yaml @@ -7,23 +7,27 @@ predictor: # weight_decay: 1.e-7 torch_scheduler_kwargs: module_type: WarmUpLinearLR - max_num_epochs: ${constants.max_epochs} + max_num_epochs: &max_epochs 100 warmup_epochs: 10 verbose: False - scheduler_kwargs: null + scheduler_kwargs: target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label multitask_handling: flatten # flatten, mean-per-label trainer: seed: ${constants.seed} + logger: + save_dir: logs/neurips2023-large/ + name: ${constants.name} + project: ${constants.name} model_checkpoint: dirpath: model_checkpoints/large-dataset/ - filename: best + filename: ${constants.name} save_last: True # saving last model - save_top_k: 1 # and best model - monitor: loss/val # wrt validation loss + # save_top_k: 1 # and best model + # monitor: loss/val # wrt validation loss trainer: precision: 16-mixed - max_epochs: ${constants.max_epochs} + max_epochs: ${predictor.torch_scheduler_kwargs.max_num_epochs} min_epochs: 1 - check_val_every_n_epoch: 1 \ No newline at end of file + check_val_every_n_epoch: 20 \ No newline at end of file diff --git a/expts/hydra-configs/training/model/largemix_gcn.yaml b/expts/hydra-configs/training/model/largemix_gcn.yaml index e2fb5d444..a18ea412e 100644 --- a/expts/hydra-configs/training/model/largemix_gcn.yaml +++ b/expts/hydra-configs/training/model/largemix_gcn.yaml @@ -9,7 +9,7 @@ constants: save_dir: logs/${constants.name} entity: multitask-gnn seed: 42 - max_epochs: 100 + max_epochs: 200 data_dir: expts/data/large-dataset raise_train_error: true diff --git a/expts/hydra-configs/training/model/largemix_gin.yaml b/expts/hydra-configs/training/model/largemix_gin.yaml new file mode 100644 index 000000000..4cfeeec9f --- /dev/null +++ b/expts/hydra-configs/training/model/largemix_gin.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +constants: + name: large_data_gin + wandb: + name: ${constants.name} + project: neurips2023-expts + entity: multitask-gnn + save_dir: logs/${constants.name} + entity: multitask-gnn + seed: 42 + max_epochs: 200 + data_dir: expts/data/large-dataset + raise_train_error: true + +trainer: + model_checkpoint: + dirpath: model_checkpoints/large-dataset/gin/ \ No newline at end of file diff --git a/expts/hydra-configs/training/model/largemix_gine.yaml b/expts/hydra-configs/training/model/largemix_gine.yaml index cce70c305..7fd722b2d 100644 --- a/expts/hydra-configs/training/model/largemix_gine.yaml +++ b/expts/hydra-configs/training/model/largemix_gine.yaml @@ -9,7 +9,7 @@ constants: save_dir: logs/${constants.name} entity: multitask-gnn seed: 42 - max_epochs: 100 + max_epochs: 200 data_dir: expts/data/large-dataset raise_train_error: true diff --git a/expts/hydra-configs/training/model/toymix_gcn.yaml b/expts/hydra-configs/training/model/toymix_gcn.yaml index f1df5618c..0fd27be90 100644 --- a/expts/hydra-configs/training/model/toymix_gcn.yaml +++ b/expts/hydra-configs/training/model/toymix_gcn.yaml @@ -1,18 +1,12 @@ # @package _global_ constants: - name: small_data_gcn - wandb: - name: ${constants.name} - project: neurips2023-expts - entity: multitask-gnn - save_dir: logs/${constants.name} - entity: multitask-gnn + name: neurips2023_small_data_gcn seed: 42 max_epochs: 100 - data_dir: expts/data/small-dataset + data_dir: expts/data/neurips2023/small-dataset raise_train_error: true trainer: model_checkpoint: - dirpath: model_checkpoints/small-dataset/gcn/ \ No newline at end of file + dirpath: models_checkpoints/small-dataset/gcn/ \ No newline at end of file diff --git a/expts/hydra-configs/training/toymix.yaml b/expts/hydra-configs/training/toymix.yaml index 8be3fea04..5e4ece0b1 100644 --- a/expts/hydra-configs/training/toymix.yaml +++ b/expts/hydra-configs/training/toymix.yaml @@ -3,7 +3,7 @@ predictor: random_seed: ${constants.seed} optim_kwargs: - lr: 1.e-4 # warmup can be scheduled using torch_scheduler_kwargs + lr: 4.e-5 # warmup can be scheduled using torch_scheduler_kwargs # weight_decay: 1.e-7 torch_scheduler_kwargs: module_type: WarmUpLinearLR @@ -11,19 +11,16 @@ predictor: warmup_epochs: 10 verbose: False scheduler_kwargs: null - target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label + target_nan_mask: null multitask_handling: flatten # flatten, mean-per-label trainer: seed: ${constants.seed} model_checkpoint: - dirpath: model_checkpoints/large-dataset/ - filename: ${constants.name}-{epoch} - save_last: True # saving last model - save_top_k: 1 # and best model - monitor: loss/val # wrt validation loss + filename: ${constants.name} + save_last: True trainer: - precision: 16-mixed - max_epochs: ${constants.max_epochs} + precision: 16 + max_epochs: ${constants.max_epochs}-{epoch} min_epochs: 1 - check_val_every_n_epoch: 1 \ No newline at end of file + check_val_every_n_epoch: 20 \ No newline at end of file diff --git a/graphium/cli/test.py b/graphium/cli/test.py index ac9bcf6c3..d4070be03 100644 --- a/graphium/cli/test.py +++ b/graphium/cli/test.py @@ -50,7 +50,7 @@ def run_testing(cfg: DictConfig) -> None: datamodule = load_datamodule(cfg, accelerator_type) ## Load Predictor - predictor = PredictorModule.load_from_checkpoint(checkpoint_path=get_checkpoint_path(cfg)) + predictor = PredictorModule.load_from_checkpoint(checkpoint_path=get_checkpoint_path(cfg), map_location=cfg["accelerator"]["type"]) ## Load Trainer date_time_suffix = datetime.now().strftime("%d.%m.%Y_%H.%M.%S") diff --git a/scripts/featurize/featurize.sh b/scripts/featurize/featurize.sh new file mode 100644 index 000000000..4bbdc902b --- /dev/null +++ b/scripts/featurize/featurize.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=featurize + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/featurize.out +#SBATCH --error=outputs/error_featurize.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +micromamba run -n graphium -c graphium-prepare-data \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ \ No newline at end of file diff --git a/scripts/test/gcn/test-best-th2_gcn_v100.sh b/scripts/test/gcn/test-best-th2_gcn_v100.sh new file mode 100644 index 000000000..5b454df9c --- /dev/null +++ b/scripts/test/gcn/test-best-th2_gcn_v100.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-best-th2_gcn_v100 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-best-th2_gcn_v100.out +#SBATCH --error=outputs/error_test-best-th2_gcn_v100.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" \ + +ckpt_name_for_testing="best" + \ No newline at end of file diff --git a/scripts/test/gcn/test-last-th2_gcn_a100.sh b/scripts/test/gcn/test-last-th2_gcn_a100.sh new file mode 100644 index 000000000..76db6301d --- /dev/null +++ b/scripts/test/gcn/test-last-th2_gcn_a100.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-last-th2_gcn_a100 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-last-th2_gcn_a100.out +#SBATCH --error=outputs/error_test-last-th2_gcn_a100.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/test/gin/test-best-th2_gin_v100.sh b/scripts/test/gin/test-best-th2_gin_v100.sh new file mode 100644 index 000000000..5b454df9c --- /dev/null +++ b/scripts/test/gin/test-best-th2_gin_v100.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-best-th2_gcn_v100 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-best-th2_gcn_v100.out +#SBATCH --error=outputs/error_test-best-th2_gcn_v100.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" \ + +ckpt_name_for_testing="best" + \ No newline at end of file diff --git a/scripts/test/gin/test-last-th2_gin_a100.sh b/scripts/test/gin/test-last-th2_gin_a100.sh new file mode 100644 index 000000000..76db6301d --- /dev/null +++ b/scripts/test/gin/test-last-th2_gin_a100.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-last-th2_gcn_a100 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-last-th2_gcn_a100.out +#SBATCH --error=outputs/error_test-last-th2_gcn_a100.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/test/gine/test-best-th2_gine_v100.sh b/scripts/test/gine/test-best-th2_gine_v100.sh new file mode 100644 index 000000000..a88254f4d --- /dev/null +++ b/scripts/test/gine/test-best-th2_gine_v100.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-best-th2_gine_v100 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-best-th2_gine_v100.out +#SBATCH --error=outputs/error_test-best-th2_gine_v100.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" \ + +ckpt_name_for_testing="best" + \ No newline at end of file diff --git a/scripts/test/gine/test-last-th2_gine_a100.sh b/scripts/test/gine/test-last-th2_gine_a100.sh new file mode 100644 index 000000000..9a1bab0eb --- /dev/null +++ b/scripts/test/gine/test-last-th2_gine_a100.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-last-th2_gine_c112 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-last-th2_gine_c112.out +#SBATCH --error=outputs/error_test-last-th2_gine_c112.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=cpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/cpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/test/gine/test-last-th2_gine_a112.sh b/scripts/test/gine/test-last-th2_gine_a112.sh new file mode 100644 index 000000000..90d339758 --- /dev/null +++ b/scripts/test/gine/test-last-th2_gine_a112.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-last-th2_gine_a100 + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/test-last-th2_gine_a100.out +#SBATCH --error=outputs/error_test-last-th2_gine_a100.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/test/test-on-cpu.sh b/scripts/test/test-on-cpu.sh new file mode 100644 index 000000000..398739a03 --- /dev/null +++ b/scripts/test/test-on-cpu.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=test-on-cpu + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/name=test-on-cpu.out +#SBATCH --error=outputs/error_name=test-on-cpu.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ + accelerator=cpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" \ + model=gcn +ckpt_name_for_testing="best" + \ No newline at end of file diff --git a/scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh b/scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh new file mode 100644 index 000000000..20d6f764e --- /dev/null +++ b/scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th2_gcn_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th2_gcn_a100_disk.out +#SBATCH --error=outputs/error_large_th2_gcn_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh b/scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh new file mode 100644 index 000000000..e30951829 --- /dev/null +++ b/scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th3_gcn_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th3_gcn_a100_disk.out +#SBATCH --error=outputs/error_large_th3_gcn_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh b/scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh new file mode 100644 index 000000000..556cc5b57 --- /dev/null +++ b/scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th4_gcn_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th4_gcn_a100_disk.out +#SBATCH --error=outputs/error_large_th4_gcn_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gcn accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gcn/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gin/gpu/large_th2_gin_a100_disk.sh b/scripts/train/gin/gpu/large_th2_gin_a100_disk.sh new file mode 100644 index 000000000..6a6d8a701 --- /dev/null +++ b/scripts/train/gin/gpu/large_th2_gin_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th2_gin_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th2_gin_a100_disk.out +#SBATCH --error=outputs/error_large_th2_gin_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gin accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gin/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gin/gpu/large_th3_gin_a100_disk.sh b/scripts/train/gin/gpu/large_th3_gin_a100_disk.sh new file mode 100644 index 000000000..94570d301 --- /dev/null +++ b/scripts/train/gin/gpu/large_th3_gin_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th3_gin_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th3_gin_a100_disk.out +#SBATCH --error=outputs/error_large_th3_gin_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gin accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gin/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gin/gpu/large_th4_gin_a100_disk.sh b/scripts/train/gin/gpu/large_th4_gin_a100_disk.sh new file mode 100644 index 000000000..c900f1102 --- /dev/null +++ b/scripts/train/gin/gpu/large_th4_gin_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th4_gin_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th4_gin_a100_disk.out +#SBATCH --error=outputs/error_large_th4_gin_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gin accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gin/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gine/cpu/large_th2_gine_c112_disk.sh b/scripts/train/gine/cpu/large_th2_gine_c112_disk.sh new file mode 100644 index 000000000..86ef64043 --- /dev/null +++ b/scripts/train/gine/cpu/large_th2_gine_c112_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th2_gine_c112_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th2_gine_c112_disk.out +#SBATCH --error=outputs/error_large_th2_gine_c112_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=cpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/cpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gine/cpu/large_th3_gine_c112_disk.sh b/scripts/train/gine/cpu/large_th3_gine_c112_disk.sh new file mode 100644 index 000000000..4d1d51a94 --- /dev/null +++ b/scripts/train/gine/cpu/large_th3_gine_c112_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th3_gine_c112_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th3_gine_c112_disk.out +#SBATCH --error=outputs/error_large_th3_gine_c112_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=cpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gine/cpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gine/cpu/large_th4_gine_c112_disk.sh b/scripts/train/gine/cpu/large_th4_gine_c112_disk.sh new file mode 100644 index 000000000..2262f6e5a --- /dev/null +++ b/scripts/train/gine/cpu/large_th4_gine_c112_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th4_gine_c112_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th4_gine_c112_disk.out +#SBATCH --error=outputs/error_large_th4_gine_c112_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=cpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gine/cpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gine/gpu/large_th2_gine_a100_disk.sh b/scripts/train/gine/gpu/large_th2_gine_a100_disk.sh new file mode 100644 index 000000000..f0ceda809 --- /dev/null +++ b/scripts/train/gine/gpu/large_th2_gine_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th2_gine_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th2_gine_a100_disk.out +#SBATCH --error=outputs/error_large_th2_gine_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gine/gpu/large_th3_gine_a100_disk.sh b/scripts/train/gine/gpu/large_th3_gine_a100_disk.sh new file mode 100644 index 000000000..575df9c99 --- /dev/null +++ b/scripts/train/gine/gpu/large_th3_gine_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th3_gine_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th3_gine_a100_disk.out +#SBATCH --error=outputs/error_large_th3_gine_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gine/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" + \ No newline at end of file diff --git a/scripts/train/gine/gpu/large_th4_gine_a100_disk.sh b/scripts/train/gine/gpu/large_th4_gine_a100_disk.sh new file mode 100644 index 000000000..aec748abe --- /dev/null +++ b/scripts/train/gine/gpu/large_th4_gine_a100_disk.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=large_th4_gine_a100_disk + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/large_th4_gine_a100_disk.out +#SBATCH --error=outputs/error_large_th4_gine_a100_disk.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ + model=gine accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gine/gpu/" \ + datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ + datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" + \ No newline at end of file diff --git a/scripts/train/sweep.sh b/scripts/train/sweep.sh new file mode 100644 index 000000000..c1647349f --- /dev/null +++ b/scripts/train/sweep.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/d2cm706t \ No newline at end of file diff --git a/scripts/train/test.sh b/scripts/train/test.sh new file mode 100644 index 000000000..e24adc744 --- /dev/null +++ b/scripts/train/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +source /home/frederik_valencediscovery_com/.bashrc +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/d2cm706t \ No newline at end of file diff --git a/scripts/train/train.sh b/scripts/train/train.sh new file mode 100644 index 000000000..0c3b44fed --- /dev/null +++ b/scripts/train/train.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/" \ + predictor.optim_kwargs.lr=0.0002 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file From 77a28a035eed72a5af6bee73ca270a4178ef11b4 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 14:38:28 -0400 Subject: [PATCH 06/30] Adding script for test sweep --- test_sweep.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 test_sweep.sh diff --git a/test_sweep.sh b/test_sweep.sh new file mode 100644 index 000000000..313859caa --- /dev/null +++ b/test_sweep.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=c112 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/46xtxuht \ No newline at end of file From af948e173451e17a6e8bafe8a5f5e4a14300c9ea Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 15:17:26 -0400 Subject: [PATCH 07/30] scripts for sbatch --- sweeps/gcn-l1000_mcf7.sh | 23 +++++++++++++++++++++++ sweeps/gcn-l1000_vcap.sh | 23 +++++++++++++++++++++++ sweeps/gcn-pcba_1328.sh | 23 +++++++++++++++++++++++ sweeps/gin-l1000_mcf7.sh | 23 +++++++++++++++++++++++ sweeps/gin-l1000_vcap.sh | 23 +++++++++++++++++++++++ sweeps/gin-pcba_1328.sh | 23 +++++++++++++++++++++++ sweeps/gine-l1000_mcf7.sh | 23 +++++++++++++++++++++++ sweeps/gine-l1000_vcap.sh | 23 +++++++++++++++++++++++ sweeps/gine-pcba_1328.sh | 23 +++++++++++++++++++++++ 9 files changed, 207 insertions(+) create mode 100644 sweeps/gcn-l1000_mcf7.sh create mode 100644 sweeps/gcn-l1000_vcap.sh create mode 100644 sweeps/gcn-pcba_1328.sh create mode 100644 sweeps/gin-l1000_mcf7.sh create mode 100644 sweeps/gin-l1000_vcap.sh create mode 100644 sweeps/gin-pcba_1328.sh create mode 100644 sweeps/gine-l1000_mcf7.sh create mode 100644 sweeps/gine-l1000_vcap.sh create mode 100644 sweeps/gine-pcba_1328.sh diff --git a/sweeps/gcn-l1000_mcf7.sh b/sweeps/gcn-l1000_mcf7.sh new file mode 100644 index 000000000..6900f3986 --- /dev/null +++ b/sweeps/gcn-l1000_mcf7.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/ntzo59la \ No newline at end of file diff --git a/sweeps/gcn-l1000_vcap.sh b/sweeps/gcn-l1000_vcap.sh new file mode 100644 index 000000000..4938654de --- /dev/null +++ b/sweeps/gcn-l1000_vcap.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001a1001a1001 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/0d3n1d5g \ No newline at end of file diff --git a/sweeps/gcn-pcba_1328.sh b/sweeps/gcn-pcba_1328.sh new file mode 100644 index 000000000..fc64ccb3b --- /dev/null +++ b/sweeps/gcn-pcba_1328.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/c22gvpm5 \ No newline at end of file diff --git a/sweeps/gin-l1000_mcf7.sh b/sweeps/gin-l1000_mcf7.sh new file mode 100644 index 000000000..9b66e0973 --- /dev/null +++ b/sweeps/gin-l1000_mcf7.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/s12dfu36 \ No newline at end of file diff --git a/sweeps/gin-l1000_vcap.sh b/sweeps/gin-l1000_vcap.sh new file mode 100644 index 000000000..2208bd7dc --- /dev/null +++ b/sweeps/gin-l1000_vcap.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/kmtjw7up \ No newline at end of file diff --git a/sweeps/gin-pcba_1328.sh b/sweeps/gin-pcba_1328.sh new file mode 100644 index 000000000..4a425728d --- /dev/null +++ b/sweeps/gin-pcba_1328.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/xzyzv04b \ No newline at end of file diff --git a/sweeps/gine-l1000_mcf7.sh b/sweeps/gine-l1000_mcf7.sh new file mode 100644 index 000000000..3da32245d --- /dev/null +++ b/sweeps/gine-l1000_mcf7.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/4v2rsrvo \ No newline at end of file diff --git a/sweeps/gine-l1000_vcap.sh b/sweeps/gine-l1000_vcap.sh new file mode 100644 index 000000000..39e5197b7 --- /dev/null +++ b/sweeps/gine-l1000_vcap.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1001 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/cxmipyd8 \ No newline at end of file diff --git a/sweeps/gine-pcba_1328.sh b/sweeps/gine-pcba_1328.sh new file mode 100644 index 000000000..0c540fbd8 --- /dev/null +++ b/sweeps/gine-pcba_1328.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=sweep + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/sweep.out +#SBATCH --error=outputs/error_sweep.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +source /home/frederik_valencediscovery_com/.bashrc +cd /home/frederik_valencediscovery_com/projects/graphium_expts +source activate graphium_dev + +wandb agent multitask-gnn/neurips2023-large-single-dataset/jtj1cb4r \ No newline at end of file From 7b0fdf3df1fe7cfd28fb54378b991b86465da7bb Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 15:32:19 -0400 Subject: [PATCH 08/30] correction --- sweeps/gcn-l1000_vcap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sweeps/gcn-l1000_vcap.sh b/sweeps/gcn-l1000_vcap.sh index 4938654de..75183f7b3 100644 --- a/sweeps/gcn-l1000_vcap.sh +++ b/sweeps/gcn-l1000_vcap.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001a1001a1001 +#SBATCH --partition=a1001 set -e From 17a92fe24d1c9b293873f95b0c139a9ebf724106 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 15:53:29 -0400 Subject: [PATCH 09/30] Switching to V100 --- sweeps/gcn-l1000_mcf7.sh | 2 +- sweeps/gcn-l1000_vcap.sh | 2 +- sweeps/gin-l1000_mcf7.sh | 2 +- sweeps/gin-l1000_vcap.sh | 2 +- sweeps/gine-l1000_mcf7.sh | 2 +- sweeps/gine-l1000_vcap.sh | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sweeps/gcn-l1000_mcf7.sh b/sweeps/gcn-l1000_mcf7.sh index 6900f3986..4feae870d 100644 --- a/sweeps/gcn-l1000_mcf7.sh +++ b/sweeps/gcn-l1000_mcf7.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001 +#SBATCH --partition=v1001 set -e diff --git a/sweeps/gcn-l1000_vcap.sh b/sweeps/gcn-l1000_vcap.sh index 75183f7b3..9777c16c6 100644 --- a/sweeps/gcn-l1000_vcap.sh +++ b/sweeps/gcn-l1000_vcap.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001 +#SBATCH --partition=v1001 set -e diff --git a/sweeps/gin-l1000_mcf7.sh b/sweeps/gin-l1000_mcf7.sh index 9b66e0973..cc6c08688 100644 --- a/sweeps/gin-l1000_mcf7.sh +++ b/sweeps/gin-l1000_mcf7.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001 +#SBATCH --partition=v1001 set -e diff --git a/sweeps/gin-l1000_vcap.sh b/sweeps/gin-l1000_vcap.sh index 2208bd7dc..4484d988d 100644 --- a/sweeps/gin-l1000_vcap.sh +++ b/sweeps/gin-l1000_vcap.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001 +#SBATCH --partition=v1001 set -e diff --git a/sweeps/gine-l1000_mcf7.sh b/sweeps/gine-l1000_mcf7.sh index 3da32245d..f4ad85467 100644 --- a/sweeps/gine-l1000_mcf7.sh +++ b/sweeps/gine-l1000_mcf7.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001 +#SBATCH --partition=v1001 set -e diff --git a/sweeps/gine-l1000_vcap.sh b/sweeps/gine-l1000_vcap.sh index 39e5197b7..dc5361aef 100644 --- a/sweeps/gine-l1000_vcap.sh +++ b/sweeps/gine-l1000_vcap.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1001 +#SBATCH --partition=v1001 set -e From 00cedf9747d477499279e09a72b3b82cc3a0c67d Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 16:08:59 -0400 Subject: [PATCH 10/30] Train script --- train.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 train.sh diff --git a/train.sh b/train.sh new file mode 100644 index 000000000..9a9d12db7 --- /dev/null +++ b/train.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file From fe4ead78a92c0c936b2c33d66c03109007055009 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 16:26:01 -0400 Subject: [PATCH 11/30] Changing back to cudatoolkit in env.yml --- env.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env.yml b/env.yml index e49d071a4..7fc668692 100644 --- a/env.yml +++ b/env.yml @@ -28,7 +28,7 @@ dependencies: - gcsfs >=2021.6 # ML packages - - cuda-version # works also with CPU-only system. + - cudatoolkit # works also with CPU-only system. - pytorch >=1.12 - lightning >=2.0 - torchmetrics >=0.7.0,<0.11 From d6f4ae0e32741e616440558ad78f768df5d6f4f7 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 16:52:01 -0400 Subject: [PATCH 12/30] Updating test sweep --- test_sweep.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_sweep.sh b/test_sweep.sh index 313859caa..d0ba8b74c 100644 --- a/test_sweep.sh +++ b/test_sweep.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=c112 +#SBATCH --partition=v1001 set -e @@ -20,4 +20,4 @@ source /home/frederik_valencediscovery_com/.bashrc cd /home/frederik_valencediscovery_com/projects/graphium_expts source activate graphium_dev -wandb agent multitask-gnn/neurips2023-large-single-dataset/46xtxuht \ No newline at end of file +wandb agent multitask-gnn/neurips2023-large-single-dataset/xngpcyrd \ No newline at end of file From 2a4a1296c58b548108fda531726672ad8021d0e9 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 17:11:01 -0400 Subject: [PATCH 13/30] Updating test sweep --- test_sweep.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_sweep.sh b/test_sweep.sh index d0ba8b74c..2541c1104 100644 --- a/test_sweep.sh +++ b/test_sweep.sh @@ -20,4 +20,4 @@ source /home/frederik_valencediscovery_com/.bashrc cd /home/frederik_valencediscovery_com/projects/graphium_expts source activate graphium_dev -wandb agent multitask-gnn/neurips2023-large-single-dataset/xngpcyrd \ No newline at end of file +wandb agent multitask-gnn/neurips2023-large-single-dataset/911gmdar \ No newline at end of file From 9ba7caeabe03d5080dfe4c9d3471297e6b7b36f2 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 18:00:13 -0400 Subject: [PATCH 14/30] Adding single run scripts --- single_runs/l1000_mcf7/gcn-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gcn-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gcn-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gin-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gin-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gin-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gine-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gine-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_mcf7/gine-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gcn-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gcn-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gcn-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gin-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gin-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gin-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gine-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gine-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/l1000_vcap/gine-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gcn-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gcn-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gcn-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gin-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gin-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gin-300.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gine-100.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gine-200.sh | 28 ++++++++++++++++++++++++++++ single_runs/pcba_1328/gine-300.sh | 28 ++++++++++++++++++++++++++++ 27 files changed, 756 insertions(+) create mode 100644 single_runs/l1000_mcf7/gcn-100.sh create mode 100644 single_runs/l1000_mcf7/gcn-200.sh create mode 100644 single_runs/l1000_mcf7/gcn-300.sh create mode 100644 single_runs/l1000_mcf7/gin-100.sh create mode 100644 single_runs/l1000_mcf7/gin-200.sh create mode 100644 single_runs/l1000_mcf7/gin-300.sh create mode 100644 single_runs/l1000_mcf7/gine-100.sh create mode 100644 single_runs/l1000_mcf7/gine-200.sh create mode 100644 single_runs/l1000_mcf7/gine-300.sh create mode 100644 single_runs/l1000_vcap/gcn-100.sh create mode 100644 single_runs/l1000_vcap/gcn-200.sh create mode 100644 single_runs/l1000_vcap/gcn-300.sh create mode 100644 single_runs/l1000_vcap/gin-100.sh create mode 100644 single_runs/l1000_vcap/gin-200.sh create mode 100644 single_runs/l1000_vcap/gin-300.sh create mode 100644 single_runs/l1000_vcap/gine-100.sh create mode 100644 single_runs/l1000_vcap/gine-200.sh create mode 100644 single_runs/l1000_vcap/gine-300.sh create mode 100644 single_runs/pcba_1328/gcn-100.sh create mode 100644 single_runs/pcba_1328/gcn-200.sh create mode 100644 single_runs/pcba_1328/gcn-300.sh create mode 100644 single_runs/pcba_1328/gin-100.sh create mode 100644 single_runs/pcba_1328/gin-200.sh create mode 100644 single_runs/pcba_1328/gin-300.sh create mode 100644 single_runs/pcba_1328/gine-100.sh create mode 100644 single_runs/pcba_1328/gine-200.sh create mode 100644 single_runs/pcba_1328/gine-300.sh diff --git a/single_runs/l1000_mcf7/gcn-100.sh b/single_runs/l1000_mcf7/gcn-100.sh new file mode 100644 index 000000000..0ab762292 --- /dev/null +++ b/single_runs/l1000_mcf7/gcn-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-200.sh b/single_runs/l1000_mcf7/gcn-200.sh new file mode 100644 index 000000000..499e426ac --- /dev/null +++ b/single_runs/l1000_mcf7/gcn-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-300.sh b/single_runs/l1000_mcf7/gcn-300.sh new file mode 100644 index 000000000..8542561af --- /dev/null +++ b/single_runs/l1000_mcf7/gcn-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-100.sh b/single_runs/l1000_mcf7/gin-100.sh new file mode 100644 index 000000000..c48d1f955 --- /dev/null +++ b/single_runs/l1000_mcf7/gin-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-200.sh b/single_runs/l1000_mcf7/gin-200.sh new file mode 100644 index 000000000..e4fcf7d42 --- /dev/null +++ b/single_runs/l1000_mcf7/gin-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-300.sh b/single_runs/l1000_mcf7/gin-300.sh new file mode 100644 index 000000000..2be5b4d30 --- /dev/null +++ b/single_runs/l1000_mcf7/gin-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-100.sh b/single_runs/l1000_mcf7/gine-100.sh new file mode 100644 index 000000000..9ff03e682 --- /dev/null +++ b/single_runs/l1000_mcf7/gine-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-200.sh b/single_runs/l1000_mcf7/gine-200.sh new file mode 100644 index 000000000..925f9f202 --- /dev/null +++ b/single_runs/l1000_mcf7/gine-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-300.sh b/single_runs/l1000_mcf7/gine-300.sh new file mode 100644 index 000000000..f86fa4e31 --- /dev/null +++ b/single_runs/l1000_mcf7/gine-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_mcf7 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-100.sh b/single_runs/l1000_vcap/gcn-100.sh new file mode 100644 index 000000000..307e9dde8 --- /dev/null +++ b/single_runs/l1000_vcap/gcn-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-200.sh b/single_runs/l1000_vcap/gcn-200.sh new file mode 100644 index 000000000..b5a4df952 --- /dev/null +++ b/single_runs/l1000_vcap/gcn-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-300.sh b/single_runs/l1000_vcap/gcn-300.sh new file mode 100644 index 000000000..5331cce48 --- /dev/null +++ b/single_runs/l1000_vcap/gcn-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-100.sh b/single_runs/l1000_vcap/gin-100.sh new file mode 100644 index 000000000..df8154294 --- /dev/null +++ b/single_runs/l1000_vcap/gin-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-200.sh b/single_runs/l1000_vcap/gin-200.sh new file mode 100644 index 000000000..fe0c652ea --- /dev/null +++ b/single_runs/l1000_vcap/gin-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-300.sh b/single_runs/l1000_vcap/gin-300.sh new file mode 100644 index 000000000..4304fa941 --- /dev/null +++ b/single_runs/l1000_vcap/gin-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-100.sh b/single_runs/l1000_vcap/gine-100.sh new file mode 100644 index 000000000..90071ba3c --- /dev/null +++ b/single_runs/l1000_vcap/gine-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-200.sh b/single_runs/l1000_vcap/gine-200.sh new file mode 100644 index 000000000..ee113eaee --- /dev/null +++ b/single_runs/l1000_vcap/gine-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-300.sh b/single_runs/l1000_vcap/gine-300.sh new file mode 100644 index 000000000..e57c0dca0 --- /dev/null +++ b/single_runs/l1000_vcap/gine-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=v1001 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=l1000_vcap \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh new file mode 100644 index 000000000..8d891ad30 --- /dev/null +++ b/single_runs/pcba_1328/gcn-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1002 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh new file mode 100644 index 000000000..7dacc7572 --- /dev/null +++ b/single_runs/pcba_1328/gcn-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh new file mode 100644 index 000000000..50336ff3b --- /dev/null +++ b/single_runs/pcba_1328/gcn-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gcn \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh new file mode 100644 index 000000000..1b3491dd5 --- /dev/null +++ b/single_runs/pcba_1328/gin-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1002 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh new file mode 100644 index 000000000..e12149872 --- /dev/null +++ b/single_runs/pcba_1328/gin-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh new file mode 100644 index 000000000..1f0dec8e5 --- /dev/null +++ b/single_runs/pcba_1328/gin-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gin \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh new file mode 100644 index 000000000..0f721f208 --- /dev/null +++ b/single_runs/pcba_1328/gine-100.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1002 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/100/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=100 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh new file mode 100644 index 000000000..c6f3a202d --- /dev/null +++ b/single_runs/pcba_1328/gine-200.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/200/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=200 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh new file mode 100644 index 000000000..f1d09b5ec --- /dev/null +++ b/single_runs/pcba_1328/gine-300.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +## Name of your SLURM job +#SBATCH --job-name=train + +## Files for logs: here we redirect stoout and sterr to the same file +#SBATCH --output=outputs/train.out +#SBATCH --error=outputs/error_train.out +#SBATCH --open-mode=append + +## Time limit for the job +#SBATCH --time=120:00:00 + +## Partition to use, +#SBATCH --partition=a1004 + +set -e + +micromamba run -n graphium -c graphium-train \ + model=gine \ + architecture=largemix \ + tasks=pcba_1328 \ + training=largemix \ + accelerator=gpu \ + trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/300/" \ + predictor.optim_kwargs.lr=0.0004 \ + constants.seed=300 \ + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file From b5d897e628d8e7a7bb0e53cb0d10193a37378693 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 18:32:10 -0400 Subject: [PATCH 15/30] Updating pdba runs --- single_runs/pcba_1328/gcn-100.sh | 2 +- single_runs/pcba_1328/gcn-200.sh | 2 +- single_runs/pcba_1328/gcn-300.sh | 2 +- single_runs/pcba_1328/gin-100.sh | 2 +- single_runs/pcba_1328/gin-200.sh | 2 +- single_runs/pcba_1328/gin-300.sh | 2 +- single_runs/pcba_1328/gine-100.sh | 2 +- single_runs/pcba_1328/gine-200.sh | 2 +- single_runs/pcba_1328/gine-300.sh | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh index 8d891ad30..1bbfa6bef 100644 --- a/single_runs/pcba_1328/gcn-100.sh +++ b/single_runs/pcba_1328/gcn-100.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1002 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh index 7dacc7572..30cddf322 100644 --- a/single_runs/pcba_1328/gcn-200.sh +++ b/single_runs/pcba_1328/gcn-200.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1004 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh index 50336ff3b..2b7c32515 100644 --- a/single_runs/pcba_1328/gcn-300.sh +++ b/single_runs/pcba_1328/gcn-300.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1004 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh index 1b3491dd5..71f097576 100644 --- a/single_runs/pcba_1328/gin-100.sh +++ b/single_runs/pcba_1328/gin-100.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1002 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh index e12149872..183759153 100644 --- a/single_runs/pcba_1328/gin-200.sh +++ b/single_runs/pcba_1328/gin-200.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1004 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh index 1f0dec8e5..cc5875427 100644 --- a/single_runs/pcba_1328/gin-300.sh +++ b/single_runs/pcba_1328/gin-300.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1004 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh index 0f721f208..9bdfd7aba 100644 --- a/single_runs/pcba_1328/gine-100.sh +++ b/single_runs/pcba_1328/gine-100.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1002 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh index c6f3a202d..411525cad 100644 --- a/single_runs/pcba_1328/gine-200.sh +++ b/single_runs/pcba_1328/gine-200.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1004 +#SBATCH --partition=v1002 set -e diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh index f1d09b5ec..dd88f4f46 100644 --- a/single_runs/pcba_1328/gine-300.sh +++ b/single_runs/pcba_1328/gine-300.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=a1004 +#SBATCH --partition=v1002 set -e From 952a1453513c86b3eaf7ea75003665bb4ea5c60b Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 18:56:30 -0400 Subject: [PATCH 16/30] Updating pcba runs --- single_runs/pcba_1328/gcn-100.sh | 4 ++-- single_runs/pcba_1328/gcn-200.sh | 4 ++-- single_runs/pcba_1328/gcn-300.sh | 4 ++-- single_runs/pcba_1328/gin-100.sh | 4 ++-- single_runs/pcba_1328/gin-200.sh | 4 ++-- single_runs/pcba_1328/gin-300.sh | 4 ++-- single_runs/pcba_1328/gine-100.sh | 4 ++-- single_runs/pcba_1328/gine-200.sh | 4 ++-- single_runs/pcba_1328/gine-300.sh | 4 ++-- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh index 1bbfa6bef..7de88e2fa 100644 --- a/single_runs/pcba_1328/gcn-100.sh +++ b/single_runs/pcba_1328/gcn-100.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ + constants.seed=400 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh index 30cddf322..d30e8fd52 100644 --- a/single_runs/pcba_1328/gcn-200.sh +++ b/single_runs/pcba_1328/gcn-200.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ + constants.seed=500 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh index 2b7c32515..71cc4af2c 100644 --- a/single_runs/pcba_1328/gcn-300.sh +++ b/single_runs/pcba_1328/gcn-300.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ + constants.seed=600 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh index 71f097576..58a9aed4f 100644 --- a/single_runs/pcba_1328/gin-100.sh +++ b/single_runs/pcba_1328/gin-100.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ + constants.seed=400 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh index 183759153..ad501ea46 100644 --- a/single_runs/pcba_1328/gin-200.sh +++ b/single_runs/pcba_1328/gin-200.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ + constants.seed=500 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh index cc5875427..b245e9592 100644 --- a/single_runs/pcba_1328/gin-300.sh +++ b/single_runs/pcba_1328/gin-300.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ + constants.seed=600 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh index 9bdfd7aba..c4a1650b4 100644 --- a/single_runs/pcba_1328/gine-100.sh +++ b/single_runs/pcba_1328/gine-100.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ + constants.seed=400 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh index 411525cad..b2dbd2d7f 100644 --- a/single_runs/pcba_1328/gine-200.sh +++ b/single_runs/pcba_1328/gine-200.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ + constants.seed=500 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh index dd88f4f46..fe9afb79d 100644 --- a/single_runs/pcba_1328/gine-300.sh +++ b/single_runs/pcba_1328/gine-300.sh @@ -12,7 +12,7 @@ #SBATCH --time=120:00:00 ## Partition to use, -#SBATCH --partition=v1002 +#SBATCH --partition=v1004 set -e @@ -24,5 +24,5 @@ micromamba run -n graphium -c graphium-train \ accelerator=gpu \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ + constants.seed=600 \ constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file From 38e766739fde2e7dd65168d1c57458372395fecf Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 20:12:56 -0400 Subject: [PATCH 17/30] Changing split_names to test-seen --- scripts/featurize/featurize.sh | 5 +++-- single_runs/l1000_mcf7/gcn-100.sh | 3 ++- single_runs/l1000_mcf7/gcn-200.sh | 3 ++- single_runs/l1000_mcf7/gcn-300.sh | 3 ++- single_runs/l1000_mcf7/gin-100.sh | 3 ++- single_runs/l1000_mcf7/gin-200.sh | 3 ++- single_runs/l1000_mcf7/gin-300.sh | 3 ++- single_runs/l1000_mcf7/gine-100.sh | 3 ++- single_runs/l1000_mcf7/gine-200.sh | 3 ++- single_runs/l1000_mcf7/gine-300.sh | 3 ++- single_runs/l1000_vcap/gcn-100.sh | 3 ++- single_runs/l1000_vcap/gcn-200.sh | 3 ++- single_runs/l1000_vcap/gcn-300.sh | 3 ++- single_runs/l1000_vcap/gin-100.sh | 3 ++- single_runs/l1000_vcap/gin-200.sh | 3 ++- single_runs/l1000_vcap/gin-300.sh | 3 ++- single_runs/l1000_vcap/gine-100.sh | 3 ++- single_runs/l1000_vcap/gine-200.sh | 3 ++- single_runs/l1000_vcap/gine-300.sh | 3 ++- single_runs/pcba_1328/gcn-100.sh | 3 ++- single_runs/pcba_1328/gcn-200.sh | 3 ++- single_runs/pcba_1328/gcn-300.sh | 3 ++- single_runs/pcba_1328/gin-100.sh | 3 ++- single_runs/pcba_1328/gin-200.sh | 3 ++- single_runs/pcba_1328/gin-300.sh | 3 ++- single_runs/pcba_1328/gine-100.sh | 3 ++- single_runs/pcba_1328/gine-200.sh | 3 ++- single_runs/pcba_1328/gine-300.sh | 3 ++- 28 files changed, 57 insertions(+), 29 deletions(-) diff --git a/scripts/featurize/featurize.sh b/scripts/featurize/featurize.sh index 4bbdc902b..810612081 100644 --- a/scripts/featurize/featurize.sh +++ b/scripts/featurize/featurize.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ## Name of your SLURM job -#SBATCH --job-name=featurize +#SBATCH --job-name=pcba_feat ## Files for logs: here we redirect stoout and sterr to the same file #SBATCH --output=outputs/featurize.out @@ -19,4 +19,5 @@ set -e micromamba run -n graphium -c graphium-prepare-data \ architecture=largemix \ tasks=pcba_1328 \ - training=largemix \ \ No newline at end of file + training=largemix \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-100.sh b/single_runs/l1000_mcf7/gcn-100.sh index 0ab762292..4980b8527 100644 --- a/single_runs/l1000_mcf7/gcn-100.sh +++ b/single_runs/l1000_mcf7/gcn-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-200.sh b/single_runs/l1000_mcf7/gcn-200.sh index 499e426ac..d9bbcbdcf 100644 --- a/single_runs/l1000_mcf7/gcn-200.sh +++ b/single_runs/l1000_mcf7/gcn-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-300.sh b/single_runs/l1000_mcf7/gcn-300.sh index 8542561af..ef9187af4 100644 --- a/single_runs/l1000_mcf7/gcn-300.sh +++ b/single_runs/l1000_mcf7/gcn-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-100.sh b/single_runs/l1000_mcf7/gin-100.sh index c48d1f955..76d994949 100644 --- a/single_runs/l1000_mcf7/gin-100.sh +++ b/single_runs/l1000_mcf7/gin-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-200.sh b/single_runs/l1000_mcf7/gin-200.sh index e4fcf7d42..073be2881 100644 --- a/single_runs/l1000_mcf7/gin-200.sh +++ b/single_runs/l1000_mcf7/gin-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-300.sh b/single_runs/l1000_mcf7/gin-300.sh index 2be5b4d30..41f133f0f 100644 --- a/single_runs/l1000_mcf7/gin-300.sh +++ b/single_runs/l1000_mcf7/gin-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-100.sh b/single_runs/l1000_mcf7/gine-100.sh index 9ff03e682..f2baa3234 100644 --- a/single_runs/l1000_mcf7/gine-100.sh +++ b/single_runs/l1000_mcf7/gine-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-200.sh b/single_runs/l1000_mcf7/gine-200.sh index 925f9f202..fbd0a9b2e 100644 --- a/single_runs/l1000_mcf7/gine-200.sh +++ b/single_runs/l1000_mcf7/gine-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-300.sh b/single_runs/l1000_mcf7/gine-300.sh index f86fa4e31..3a831c1dd 100644 --- a/single_runs/l1000_mcf7/gine-300.sh +++ b/single_runs/l1000_mcf7/gine-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-100.sh b/single_runs/l1000_vcap/gcn-100.sh index 307e9dde8..e7bb31674 100644 --- a/single_runs/l1000_vcap/gcn-100.sh +++ b/single_runs/l1000_vcap/gcn-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-200.sh b/single_runs/l1000_vcap/gcn-200.sh index b5a4df952..f3cf7aff8 100644 --- a/single_runs/l1000_vcap/gcn-200.sh +++ b/single_runs/l1000_vcap/gcn-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-300.sh b/single_runs/l1000_vcap/gcn-300.sh index 5331cce48..15065a848 100644 --- a/single_runs/l1000_vcap/gcn-300.sh +++ b/single_runs/l1000_vcap/gcn-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-100.sh b/single_runs/l1000_vcap/gin-100.sh index df8154294..f3e4bed5e 100644 --- a/single_runs/l1000_vcap/gin-100.sh +++ b/single_runs/l1000_vcap/gin-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-200.sh b/single_runs/l1000_vcap/gin-200.sh index fe0c652ea..ce20120ab 100644 --- a/single_runs/l1000_vcap/gin-200.sh +++ b/single_runs/l1000_vcap/gin-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-300.sh b/single_runs/l1000_vcap/gin-300.sh index 4304fa941..0fa436ec0 100644 --- a/single_runs/l1000_vcap/gin-300.sh +++ b/single_runs/l1000_vcap/gin-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-100.sh b/single_runs/l1000_vcap/gine-100.sh index 90071ba3c..08271f55d 100644 --- a/single_runs/l1000_vcap/gine-100.sh +++ b/single_runs/l1000_vcap/gine-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-200.sh b/single_runs/l1000_vcap/gine-200.sh index ee113eaee..c10aba14e 100644 --- a/single_runs/l1000_vcap/gine-200.sh +++ b/single_runs/l1000_vcap/gine-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-300.sh b/single_runs/l1000_vcap/gine-300.sh index e57c0dca0..1d2adf2a7 100644 --- a/single_runs/l1000_vcap/gine-300.sh +++ b/single_runs/l1000_vcap/gine-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh index 7de88e2fa..61337f72e 100644 --- a/single_runs/pcba_1328/gcn-100.sh +++ b/single_runs/pcba_1328/gcn-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh index d30e8fd52..ef5545149 100644 --- a/single_runs/pcba_1328/gcn-200.sh +++ b/single_runs/pcba_1328/gcn-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh index 71cc4af2c..5ff210652 100644 --- a/single_runs/pcba_1328/gcn-300.sh +++ b/single_runs/pcba_1328/gcn-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh index 58a9aed4f..a3b3ed103 100644 --- a/single_runs/pcba_1328/gin-100.sh +++ b/single_runs/pcba_1328/gin-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh index ad501ea46..76af44be4 100644 --- a/single_runs/pcba_1328/gin-200.sh +++ b/single_runs/pcba_1328/gin-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh index b245e9592..e15e2ca18 100644 --- a/single_runs/pcba_1328/gin-300.sh +++ b/single_runs/pcba_1328/gin-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh index c4a1650b4..a89316a21 100644 --- a/single_runs/pcba_1328/gine-100.sh +++ b/single_runs/pcba_1328/gine-100.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh index b2dbd2d7f..446fc4c62 100644 --- a/single_runs/pcba_1328/gine-200.sh +++ b/single_runs/pcba_1328/gine-200.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh index fe9afb79d..50da4374a 100644 --- a/single_runs/pcba_1328/gine-300.sh +++ b/single_runs/pcba_1328/gine-300.sh @@ -25,4 +25,5 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ + +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file From 914cb08281d30e88e9c25f3bccf1fae22b8b6415 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 20:39:39 -0400 Subject: [PATCH 18/30] Updating split_paths to test_seen --- scripts/featurize/featurize.sh | 2 +- single_runs/l1000_mcf7/gcn-100.sh | 2 +- single_runs/l1000_mcf7/gcn-200.sh | 2 +- single_runs/l1000_mcf7/gcn-300.sh | 2 +- single_runs/l1000_mcf7/gin-100.sh | 2 +- single_runs/l1000_mcf7/gin-200.sh | 2 +- single_runs/l1000_mcf7/gin-300.sh | 2 +- single_runs/l1000_mcf7/gine-100.sh | 2 +- single_runs/l1000_mcf7/gine-200.sh | 2 +- single_runs/l1000_mcf7/gine-300.sh | 2 +- single_runs/l1000_vcap/gcn-100.sh | 2 +- single_runs/l1000_vcap/gcn-200.sh | 2 +- single_runs/l1000_vcap/gcn-300.sh | 2 +- single_runs/l1000_vcap/gin-100.sh | 2 +- single_runs/l1000_vcap/gin-200.sh | 2 +- single_runs/l1000_vcap/gin-300.sh | 2 +- single_runs/l1000_vcap/gine-100.sh | 2 +- single_runs/l1000_vcap/gine-200.sh | 2 +- single_runs/l1000_vcap/gine-300.sh | 2 +- single_runs/pcba_1328/gcn-100.sh | 2 +- single_runs/pcba_1328/gcn-200.sh | 2 +- single_runs/pcba_1328/gcn-300.sh | 2 +- single_runs/pcba_1328/gin-100.sh | 2 +- single_runs/pcba_1328/gin-200.sh | 2 +- single_runs/pcba_1328/gin-300.sh | 2 +- single_runs/pcba_1328/gine-100.sh | 2 +- single_runs/pcba_1328/gine-200.sh | 2 +- single_runs/pcba_1328/gine-300.sh | 2 +- 28 files changed, 28 insertions(+), 28 deletions(-) diff --git a/scripts/featurize/featurize.sh b/scripts/featurize/featurize.sh index 810612081..4aef65633 100644 --- a/scripts/featurize/featurize.sh +++ b/scripts/featurize/featurize.sh @@ -20,4 +20,4 @@ micromamba run -n graphium -c graphium-prepare-data \ architecture=largemix \ tasks=pcba_1328 \ training=largemix \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-100.sh b/single_runs/l1000_mcf7/gcn-100.sh index 4980b8527..00a2cd75e 100644 --- a/single_runs/l1000_mcf7/gcn-100.sh +++ b/single_runs/l1000_mcf7/gcn-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-200.sh b/single_runs/l1000_mcf7/gcn-200.sh index d9bbcbdcf..7e3d29d88 100644 --- a/single_runs/l1000_mcf7/gcn-200.sh +++ b/single_runs/l1000_mcf7/gcn-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-300.sh b/single_runs/l1000_mcf7/gcn-300.sh index ef9187af4..50e9c0170 100644 --- a/single_runs/l1000_mcf7/gcn-300.sh +++ b/single_runs/l1000_mcf7/gcn-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-100.sh b/single_runs/l1000_mcf7/gin-100.sh index 76d994949..be2ab9386 100644 --- a/single_runs/l1000_mcf7/gin-100.sh +++ b/single_runs/l1000_mcf7/gin-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-200.sh b/single_runs/l1000_mcf7/gin-200.sh index 073be2881..487816d61 100644 --- a/single_runs/l1000_mcf7/gin-200.sh +++ b/single_runs/l1000_mcf7/gin-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-300.sh b/single_runs/l1000_mcf7/gin-300.sh index 41f133f0f..07d8fe0fe 100644 --- a/single_runs/l1000_mcf7/gin-300.sh +++ b/single_runs/l1000_mcf7/gin-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-100.sh b/single_runs/l1000_mcf7/gine-100.sh index f2baa3234..87b770fea 100644 --- a/single_runs/l1000_mcf7/gine-100.sh +++ b/single_runs/l1000_mcf7/gine-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-200.sh b/single_runs/l1000_mcf7/gine-200.sh index fbd0a9b2e..2f3390e85 100644 --- a/single_runs/l1000_mcf7/gine-200.sh +++ b/single_runs/l1000_mcf7/gine-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-300.sh b/single_runs/l1000_mcf7/gine-300.sh index 3a831c1dd..004793c89 100644 --- a/single_runs/l1000_mcf7/gine-300.sh +++ b/single_runs/l1000_mcf7/gine-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-100.sh b/single_runs/l1000_vcap/gcn-100.sh index e7bb31674..53847d131 100644 --- a/single_runs/l1000_vcap/gcn-100.sh +++ b/single_runs/l1000_vcap/gcn-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-200.sh b/single_runs/l1000_vcap/gcn-200.sh index f3cf7aff8..e17eb94f7 100644 --- a/single_runs/l1000_vcap/gcn-200.sh +++ b/single_runs/l1000_vcap/gcn-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-300.sh b/single_runs/l1000_vcap/gcn-300.sh index 15065a848..45b0d5554 100644 --- a/single_runs/l1000_vcap/gcn-300.sh +++ b/single_runs/l1000_vcap/gcn-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-100.sh b/single_runs/l1000_vcap/gin-100.sh index f3e4bed5e..7b9b80f36 100644 --- a/single_runs/l1000_vcap/gin-100.sh +++ b/single_runs/l1000_vcap/gin-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-200.sh b/single_runs/l1000_vcap/gin-200.sh index ce20120ab..92715b444 100644 --- a/single_runs/l1000_vcap/gin-200.sh +++ b/single_runs/l1000_vcap/gin-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-300.sh b/single_runs/l1000_vcap/gin-300.sh index 0fa436ec0..718644a88 100644 --- a/single_runs/l1000_vcap/gin-300.sh +++ b/single_runs/l1000_vcap/gin-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-100.sh b/single_runs/l1000_vcap/gine-100.sh index 08271f55d..4d38a5cbd 100644 --- a/single_runs/l1000_vcap/gine-100.sh +++ b/single_runs/l1000_vcap/gine-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-200.sh b/single_runs/l1000_vcap/gine-200.sh index c10aba14e..d1c68271f 100644 --- a/single_runs/l1000_vcap/gine-200.sh +++ b/single_runs/l1000_vcap/gine-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-300.sh b/single_runs/l1000_vcap/gine-300.sh index 1d2adf2a7..37837f869 100644 --- a/single_runs/l1000_vcap/gine-300.sh +++ b/single_runs/l1000_vcap/gine-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh index 61337f72e..0dd562d60 100644 --- a/single_runs/pcba_1328/gcn-100.sh +++ b/single_runs/pcba_1328/gcn-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh index ef5545149..2b822183f 100644 --- a/single_runs/pcba_1328/gcn-200.sh +++ b/single_runs/pcba_1328/gcn-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh index 5ff210652..8d35772e1 100644 --- a/single_runs/pcba_1328/gcn-300.sh +++ b/single_runs/pcba_1328/gcn-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh index a3b3ed103..ccd0a067b 100644 --- a/single_runs/pcba_1328/gin-100.sh +++ b/single_runs/pcba_1328/gin-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh index 76af44be4..8085e383e 100644 --- a/single_runs/pcba_1328/gin-200.sh +++ b/single_runs/pcba_1328/gin-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh index e15e2ca18..b4a5b9f13 100644 --- a/single_runs/pcba_1328/gin-300.sh +++ b/single_runs/pcba_1328/gin-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh index a89316a21..7f998372e 100644 --- a/single_runs/pcba_1328/gine-100.sh +++ b/single_runs/pcba_1328/gine-100.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh index 446fc4c62..b9c0d686f 100644 --- a/single_runs/pcba_1328/gine-200.sh +++ b/single_runs/pcba_1328/gine-200.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh index 50da4374a..b6a77e2c0 100644 --- a/single_runs/pcba_1328/gine-300.sh +++ b/single_runs/pcba_1328/gine-300.sh @@ -26,4 +26,4 @@ micromamba run -n graphium -c graphium-train \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=["train", "val", "test-seen"] \ \ No newline at end of file + +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file From bd3784c1e727391c591c63a970c357a9be32d6de Mon Sep 17 00:00:00 2001 From: WenkelF Date: Wed, 23 Aug 2023 21:03:57 -0400 Subject: [PATCH 19/30] Updating test_splits to test_seen --- .../tasks/loss_metrics_datamodule/l1000_mcf7.yaml | 1 + .../tasks/loss_metrics_datamodule/l1000_vcap.yaml | 1 + .../hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml | 1 + scripts/featurize/featurize.sh | 3 +-- single_runs/l1000_mcf7/gcn-100.sh | 3 +-- single_runs/l1000_mcf7/gcn-200.sh | 3 +-- single_runs/l1000_mcf7/gcn-300.sh | 3 +-- single_runs/l1000_mcf7/gin-100.sh | 3 +-- single_runs/l1000_mcf7/gin-200.sh | 3 +-- single_runs/l1000_mcf7/gin-300.sh | 3 +-- single_runs/l1000_mcf7/gine-100.sh | 3 +-- single_runs/l1000_mcf7/gine-200.sh | 3 +-- single_runs/l1000_mcf7/gine-300.sh | 3 +-- single_runs/l1000_vcap/gcn-100.sh | 3 +-- single_runs/l1000_vcap/gcn-200.sh | 3 +-- single_runs/l1000_vcap/gcn-300.sh | 3 +-- single_runs/l1000_vcap/gin-100.sh | 3 +-- single_runs/l1000_vcap/gin-200.sh | 3 +-- single_runs/l1000_vcap/gin-300.sh | 3 +-- single_runs/l1000_vcap/gine-100.sh | 3 +-- single_runs/l1000_vcap/gine-200.sh | 3 +-- single_runs/l1000_vcap/gine-300.sh | 3 +-- single_runs/pcba_1328/gcn-100.sh | 3 +-- single_runs/pcba_1328/gcn-200.sh | 3 +-- single_runs/pcba_1328/gcn-300.sh | 3 +-- single_runs/pcba_1328/gin-100.sh | 3 +-- single_runs/pcba_1328/gin-200.sh | 3 +-- single_runs/pcba_1328/gin-300.sh | 3 +-- single_runs/pcba_1328/gine-100.sh | 3 +-- single_runs/pcba_1328/gine-200.sh | 3 +-- single_runs/pcba_1328/gine-300.sh | 3 +-- 31 files changed, 31 insertions(+), 56 deletions(-) diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml index ec48f2e22..d6c9fa765 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml @@ -45,4 +45,5 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml index 9470ec903..08450ed01 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml @@ -45,4 +45,5 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml index 367521338..469dce23c 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml @@ -37,4 +37,5 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/scripts/featurize/featurize.sh b/scripts/featurize/featurize.sh index 4aef65633..49d5ab7a3 100644 --- a/scripts/featurize/featurize.sh +++ b/scripts/featurize/featurize.sh @@ -19,5 +19,4 @@ set -e micromamba run -n graphium -c graphium-prepare-data \ architecture=largemix \ tasks=pcba_1328 \ - training=largemix \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + training=largemix \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-100.sh b/single_runs/l1000_mcf7/gcn-100.sh index 00a2cd75e..0ab762292 100644 --- a/single_runs/l1000_mcf7/gcn-100.sh +++ b/single_runs/l1000_mcf7/gcn-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-200.sh b/single_runs/l1000_mcf7/gcn-200.sh index 7e3d29d88..499e426ac 100644 --- a/single_runs/l1000_mcf7/gcn-200.sh +++ b/single_runs/l1000_mcf7/gcn-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-300.sh b/single_runs/l1000_mcf7/gcn-300.sh index 50e9c0170..8542561af 100644 --- a/single_runs/l1000_mcf7/gcn-300.sh +++ b/single_runs/l1000_mcf7/gcn-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-100.sh b/single_runs/l1000_mcf7/gin-100.sh index be2ab9386..c48d1f955 100644 --- a/single_runs/l1000_mcf7/gin-100.sh +++ b/single_runs/l1000_mcf7/gin-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-200.sh b/single_runs/l1000_mcf7/gin-200.sh index 487816d61..e4fcf7d42 100644 --- a/single_runs/l1000_mcf7/gin-200.sh +++ b/single_runs/l1000_mcf7/gin-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-300.sh b/single_runs/l1000_mcf7/gin-300.sh index 07d8fe0fe..2be5b4d30 100644 --- a/single_runs/l1000_mcf7/gin-300.sh +++ b/single_runs/l1000_mcf7/gin-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-100.sh b/single_runs/l1000_mcf7/gine-100.sh index 87b770fea..9ff03e682 100644 --- a/single_runs/l1000_mcf7/gine-100.sh +++ b/single_runs/l1000_mcf7/gine-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-200.sh b/single_runs/l1000_mcf7/gine-200.sh index 2f3390e85..925f9f202 100644 --- a/single_runs/l1000_mcf7/gine-200.sh +++ b/single_runs/l1000_mcf7/gine-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-300.sh b/single_runs/l1000_mcf7/gine-300.sh index 004793c89..f86fa4e31 100644 --- a/single_runs/l1000_mcf7/gine-300.sh +++ b/single_runs/l1000_mcf7/gine-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_mcf7.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-100.sh b/single_runs/l1000_vcap/gcn-100.sh index 53847d131..307e9dde8 100644 --- a/single_runs/l1000_vcap/gcn-100.sh +++ b/single_runs/l1000_vcap/gcn-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-200.sh b/single_runs/l1000_vcap/gcn-200.sh index e17eb94f7..b5a4df952 100644 --- a/single_runs/l1000_vcap/gcn-200.sh +++ b/single_runs/l1000_vcap/gcn-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-300.sh b/single_runs/l1000_vcap/gcn-300.sh index 45b0d5554..5331cce48 100644 --- a/single_runs/l1000_vcap/gcn-300.sh +++ b/single_runs/l1000_vcap/gcn-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-100.sh b/single_runs/l1000_vcap/gin-100.sh index 7b9b80f36..df8154294 100644 --- a/single_runs/l1000_vcap/gin-100.sh +++ b/single_runs/l1000_vcap/gin-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-200.sh b/single_runs/l1000_vcap/gin-200.sh index 92715b444..fe0c652ea 100644 --- a/single_runs/l1000_vcap/gin-200.sh +++ b/single_runs/l1000_vcap/gin-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-300.sh b/single_runs/l1000_vcap/gin-300.sh index 718644a88..4304fa941 100644 --- a/single_runs/l1000_vcap/gin-300.sh +++ b/single_runs/l1000_vcap/gin-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-100.sh b/single_runs/l1000_vcap/gine-100.sh index 4d38a5cbd..90071ba3c 100644 --- a/single_runs/l1000_vcap/gine-100.sh +++ b/single_runs/l1000_vcap/gine-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-200.sh b/single_runs/l1000_vcap/gine-200.sh index d1c68271f..ee113eaee 100644 --- a/single_runs/l1000_vcap/gine-200.sh +++ b/single_runs/l1000_vcap/gine-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-300.sh b/single_runs/l1000_vcap/gine-300.sh index 37837f869..e57c0dca0 100644 --- a/single_runs/l1000_vcap/gine-300.sh +++ b/single_runs/l1000_vcap/gine-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.l1000_vcap.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh index 0dd562d60..7de88e2fa 100644 --- a/single_runs/pcba_1328/gcn-100.sh +++ b/single_runs/pcba_1328/gcn-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh index 2b822183f..d30e8fd52 100644 --- a/single_runs/pcba_1328/gcn-200.sh +++ b/single_runs/pcba_1328/gcn-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh index 8d35772e1..71cc4af2c 100644 --- a/single_runs/pcba_1328/gcn-300.sh +++ b/single_runs/pcba_1328/gcn-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh index ccd0a067b..58a9aed4f 100644 --- a/single_runs/pcba_1328/gin-100.sh +++ b/single_runs/pcba_1328/gin-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh index 8085e383e..ad501ea46 100644 --- a/single_runs/pcba_1328/gin-200.sh +++ b/single_runs/pcba_1328/gin-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh index b4a5b9f13..b245e9592 100644 --- a/single_runs/pcba_1328/gin-300.sh +++ b/single_runs/pcba_1328/gin-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh index 7f998372e..c4a1650b4 100644 --- a/single_runs/pcba_1328/gine-100.sh +++ b/single_runs/pcba_1328/gine-100.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/100/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh index b9c0d686f..b2dbd2d7f 100644 --- a/single_runs/pcba_1328/gine-200.sh +++ b/single_runs/pcba_1328/gine-200.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/200/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh index b6a77e2c0..fe9afb79d 100644 --- a/single_runs/pcba_1328/gine-300.sh +++ b/single_runs/pcba_1328/gine-300.sh @@ -25,5 +25,4 @@ micromamba run -n graphium -c graphium-train \ trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/300/" \ predictor.optim_kwargs.lr=0.0004 \ constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ - +datamodule.args.task_specific_args.pcba_1328.split_names=[train, val, test_seen] \ \ No newline at end of file + constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file From 2ed5f1e636cb2458cfefe0fc0f6fdeb8286bf0c5 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Thu, 24 Aug 2023 14:14:03 -0400 Subject: [PATCH 20/30] Removing scripts --- .gitignore | 1 + scripts/convert_yml.py | 29 ------------------- scripts/featurize/featurize.sh | 22 -------------- scripts/ipu_start.sh | 11 ------- scripts/ipu_venv.sh | 16 ---------- scripts/test/gcn/test-best-th2_gcn_v100.sh | 25 ---------------- scripts/test/gcn/test-last-th2_gcn_a100.sh | 24 --------------- scripts/test/gin/test-best-th2_gin_v100.sh | 25 ---------------- scripts/test/gin/test-last-th2_gin_a100.sh | 24 --------------- scripts/test/gine/test-best-th2_gine_v100.sh | 25 ---------------- scripts/test/gine/test-last-th2_gine_a100.sh | 24 --------------- scripts/test/gine/test-last-th2_gine_a112.sh | 24 --------------- scripts/test/test-on-cpu.sh | 25 ---------------- .../train/gcn/gpu/large_th2_gcn_a100_disk.sh | 24 --------------- .../train/gcn/gpu/large_th3_gcn_a100_disk.sh | 24 --------------- .../train/gcn/gpu/large_th4_gcn_a100_disk.sh | 24 --------------- .../train/gin/gpu/large_th2_gin_a100_disk.sh | 24 --------------- .../train/gin/gpu/large_th3_gin_a100_disk.sh | 24 --------------- .../train/gin/gpu/large_th4_gin_a100_disk.sh | 24 --------------- .../gine/cpu/large_th2_gine_c112_disk.sh | 24 --------------- .../gine/cpu/large_th3_gine_c112_disk.sh | 24 --------------- .../gine/cpu/large_th4_gine_c112_disk.sh | 24 --------------- .../gine/gpu/large_th2_gine_a100_disk.sh | 24 --------------- .../gine/gpu/large_th3_gine_a100_disk.sh | 24 --------------- .../gine/gpu/large_th4_gine_a100_disk.sh | 24 --------------- scripts/train/sweep.sh | 21 -------------- scripts/train/test.sh | 6 ---- scripts/train/train.sh | 27 ----------------- single_runs/l1000_mcf7/gcn-100.sh | 28 ------------------ single_runs/l1000_mcf7/gcn-200.sh | 28 ------------------ single_runs/l1000_mcf7/gcn-300.sh | 28 ------------------ single_runs/l1000_mcf7/gin-100.sh | 28 ------------------ single_runs/l1000_mcf7/gin-200.sh | 28 ------------------ single_runs/l1000_mcf7/gin-300.sh | 28 ------------------ single_runs/l1000_mcf7/gine-100.sh | 28 ------------------ single_runs/l1000_mcf7/gine-200.sh | 28 ------------------ single_runs/l1000_mcf7/gine-300.sh | 28 ------------------ single_runs/l1000_vcap/gcn-100.sh | 28 ------------------ single_runs/l1000_vcap/gcn-200.sh | 28 ------------------ single_runs/l1000_vcap/gcn-300.sh | 28 ------------------ single_runs/l1000_vcap/gin-100.sh | 28 ------------------ single_runs/l1000_vcap/gin-200.sh | 28 ------------------ single_runs/l1000_vcap/gin-300.sh | 28 ------------------ single_runs/l1000_vcap/gine-100.sh | 28 ------------------ single_runs/l1000_vcap/gine-200.sh | 28 ------------------ single_runs/l1000_vcap/gine-300.sh | 28 ------------------ single_runs/pcba_1328/gcn-100.sh | 28 ------------------ single_runs/pcba_1328/gcn-200.sh | 28 ------------------ single_runs/pcba_1328/gcn-300.sh | 28 ------------------ single_runs/pcba_1328/gin-100.sh | 28 ------------------ single_runs/pcba_1328/gin-200.sh | 28 ------------------ single_runs/pcba_1328/gin-300.sh | 28 ------------------ single_runs/pcba_1328/gine-100.sh | 28 ------------------ single_runs/pcba_1328/gine-200.sh | 28 ------------------ single_runs/pcba_1328/gine-300.sh | 28 ------------------ sweeps/gcn-l1000_mcf7.sh | 23 --------------- sweeps/gcn-l1000_vcap.sh | 23 --------------- sweeps/gcn-pcba_1328.sh | 23 --------------- sweeps/gin-l1000_mcf7.sh | 23 --------------- sweeps/gin-l1000_vcap.sh | 23 --------------- sweeps/gin-pcba_1328.sh | 23 --------------- sweeps/gine-l1000_mcf7.sh | 23 --------------- sweeps/gine-l1000_vcap.sh | 23 --------------- sweeps/gine-pcba_1328.sh | 23 --------------- test_sweep.sh | 23 --------------- train.sh | 27 ----------------- 66 files changed, 1 insertion(+), 1629 deletions(-) delete mode 100644 scripts/convert_yml.py delete mode 100644 scripts/featurize/featurize.sh delete mode 100644 scripts/ipu_start.sh delete mode 100644 scripts/ipu_venv.sh delete mode 100644 scripts/test/gcn/test-best-th2_gcn_v100.sh delete mode 100644 scripts/test/gcn/test-last-th2_gcn_a100.sh delete mode 100644 scripts/test/gin/test-best-th2_gin_v100.sh delete mode 100644 scripts/test/gin/test-last-th2_gin_a100.sh delete mode 100644 scripts/test/gine/test-best-th2_gine_v100.sh delete mode 100644 scripts/test/gine/test-last-th2_gine_a100.sh delete mode 100644 scripts/test/gine/test-last-th2_gine_a112.sh delete mode 100644 scripts/test/test-on-cpu.sh delete mode 100644 scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh delete mode 100644 scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh delete mode 100644 scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh delete mode 100644 scripts/train/gin/gpu/large_th2_gin_a100_disk.sh delete mode 100644 scripts/train/gin/gpu/large_th3_gin_a100_disk.sh delete mode 100644 scripts/train/gin/gpu/large_th4_gin_a100_disk.sh delete mode 100644 scripts/train/gine/cpu/large_th2_gine_c112_disk.sh delete mode 100644 scripts/train/gine/cpu/large_th3_gine_c112_disk.sh delete mode 100644 scripts/train/gine/cpu/large_th4_gine_c112_disk.sh delete mode 100644 scripts/train/gine/gpu/large_th2_gine_a100_disk.sh delete mode 100644 scripts/train/gine/gpu/large_th3_gine_a100_disk.sh delete mode 100644 scripts/train/gine/gpu/large_th4_gine_a100_disk.sh delete mode 100644 scripts/train/sweep.sh delete mode 100644 scripts/train/test.sh delete mode 100644 scripts/train/train.sh delete mode 100644 single_runs/l1000_mcf7/gcn-100.sh delete mode 100644 single_runs/l1000_mcf7/gcn-200.sh delete mode 100644 single_runs/l1000_mcf7/gcn-300.sh delete mode 100644 single_runs/l1000_mcf7/gin-100.sh delete mode 100644 single_runs/l1000_mcf7/gin-200.sh delete mode 100644 single_runs/l1000_mcf7/gin-300.sh delete mode 100644 single_runs/l1000_mcf7/gine-100.sh delete mode 100644 single_runs/l1000_mcf7/gine-200.sh delete mode 100644 single_runs/l1000_mcf7/gine-300.sh delete mode 100644 single_runs/l1000_vcap/gcn-100.sh delete mode 100644 single_runs/l1000_vcap/gcn-200.sh delete mode 100644 single_runs/l1000_vcap/gcn-300.sh delete mode 100644 single_runs/l1000_vcap/gin-100.sh delete mode 100644 single_runs/l1000_vcap/gin-200.sh delete mode 100644 single_runs/l1000_vcap/gin-300.sh delete mode 100644 single_runs/l1000_vcap/gine-100.sh delete mode 100644 single_runs/l1000_vcap/gine-200.sh delete mode 100644 single_runs/l1000_vcap/gine-300.sh delete mode 100644 single_runs/pcba_1328/gcn-100.sh delete mode 100644 single_runs/pcba_1328/gcn-200.sh delete mode 100644 single_runs/pcba_1328/gcn-300.sh delete mode 100644 single_runs/pcba_1328/gin-100.sh delete mode 100644 single_runs/pcba_1328/gin-200.sh delete mode 100644 single_runs/pcba_1328/gin-300.sh delete mode 100644 single_runs/pcba_1328/gine-100.sh delete mode 100644 single_runs/pcba_1328/gine-200.sh delete mode 100644 single_runs/pcba_1328/gine-300.sh delete mode 100644 sweeps/gcn-l1000_mcf7.sh delete mode 100644 sweeps/gcn-l1000_vcap.sh delete mode 100644 sweeps/gcn-pcba_1328.sh delete mode 100644 sweeps/gin-l1000_mcf7.sh delete mode 100644 sweeps/gin-l1000_vcap.sh delete mode 100644 sweeps/gin-pcba_1328.sh delete mode 100644 sweeps/gine-l1000_mcf7.sh delete mode 100644 sweeps/gine-l1000_vcap.sh delete mode 100644 sweeps/gine-pcba_1328.sh delete mode 100644 test_sweep.sh delete mode 100644 train.sh diff --git a/.gitignore b/.gitignore index 77cd466fc..371e5343c 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ datacache/ tests/temp_cache* predictions/ draft/ +scripts/ # Data and predictions graphium/data/ZINC_bench_gnn/ diff --git a/scripts/convert_yml.py b/scripts/convert_yml.py deleted file mode 100644 index 90cb8e3c1..000000000 --- a/scripts/convert_yml.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Convert the dependencies from conda's `env.yml` to pip `requirements.txt` -""" - -import ruamel.yaml - -yaml = ruamel.yaml.YAML() -data = yaml.load(open("env.yml")) - -requirements = [] -for dep in data["dependencies"]: - if isinstance(dep, str): - outputs = dep.split("=") - if len(outputs) == 1: - package = outputs[0] - requirements.append(package) - elif len(outputs) == 2: - package, package_version = outputs[0], outputs[1] - requirements.append(package + "==" + package_version) - elif len(outputs) == 3: - package, package_version, python_version = outputs[0], outputs[1], outputs[2] - requirements.append(package + "==" + package_version) - elif isinstance(dep, dict): - for preq in dep.get("pip", []): - requirements.append(preq) - -with open("requirements.txt", "w") as fp: - for requirement in requirements: - print(requirement, file=fp) diff --git a/scripts/featurize/featurize.sh b/scripts/featurize/featurize.sh deleted file mode 100644 index 49d5ab7a3..000000000 --- a/scripts/featurize/featurize.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=pcba_feat - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/featurize.out -#SBATCH --error=outputs/error_featurize.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -micromamba run -n graphium -c graphium-prepare-data \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ \ No newline at end of file diff --git a/scripts/ipu_start.sh b/scripts/ipu_start.sh deleted file mode 100644 index 4c45da5c0..000000000 --- a/scripts/ipu_start.sh +++ /dev/null @@ -1,11 +0,0 @@ -""" -Start the ipu environment and SDK -""" - -source /opt/gc/sdk-3.0.0+1128/poplar-ubuntu_20_04-3.0.0+5468-0379b9a65d/enable.sh -source /opt/gc/sdk-3.0.0+1128/popart-ubuntu_20_04-3.0.0+5468-0379b9a65d/enable.sh - -source ~/.venv/graphium_ipu/bin/activate # Change to your path - -export VISUAL=vim -export EDITOR="$VISUAL" diff --git a/scripts/ipu_venv.sh b/scripts/ipu_venv.sh deleted file mode 100644 index 9c9a96976..000000000 --- a/scripts/ipu_venv.sh +++ /dev/null @@ -1,16 +0,0 @@ -""" -Create the pip environment for IPU -""" - -## Uncomment this to create the folder for the environment -# mkdir ~/.venv # Create the folder for the environment -# python3 -m venv ~/.venv/graphium_ipu # Create the environment -# source ~/.venv/graphium_ipu/bin/activate # Activate the environment - -# Installing the dependencies for the IPU environment -pip install torch==1.10+cpu torchvision==0.11+cpu torchaudio==0.10 -f https://download.pytorch.org/whl/torch_stable.html -pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cpu.html -pip install dgl dglgo -f https://data.dgl.ai/wheels/repo.html -pip install /opt/gc/sdk-3.0.0+1128/poptorch-3.0.0+84519_672c9cbc7f_ubuntu_20_04-cp38-cp38-linux_x86_64.whl -pip install -r requirements.txt -pip install -e . diff --git a/scripts/test/gcn/test-best-th2_gcn_v100.sh b/scripts/test/gcn/test-best-th2_gcn_v100.sh deleted file mode 100644 index 5b454df9c..000000000 --- a/scripts/test/gcn/test-best-th2_gcn_v100.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-best-th2_gcn_v100 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-best-th2_gcn_v100.out -#SBATCH --error=outputs/error_test-best-th2_gcn_v100.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" \ - +ckpt_name_for_testing="best" - \ No newline at end of file diff --git a/scripts/test/gcn/test-last-th2_gcn_a100.sh b/scripts/test/gcn/test-last-th2_gcn_a100.sh deleted file mode 100644 index 76db6301d..000000000 --- a/scripts/test/gcn/test-last-th2_gcn_a100.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-last-th2_gcn_a100 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-last-th2_gcn_a100.out -#SBATCH --error=outputs/error_test-last-th2_gcn_a100.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/test/gin/test-best-th2_gin_v100.sh b/scripts/test/gin/test-best-th2_gin_v100.sh deleted file mode 100644 index 5b454df9c..000000000 --- a/scripts/test/gin/test-best-th2_gin_v100.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-best-th2_gcn_v100 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-best-th2_gcn_v100.out -#SBATCH --error=outputs/error_test-best-th2_gcn_v100.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" \ - +ckpt_name_for_testing="best" - \ No newline at end of file diff --git a/scripts/test/gin/test-last-th2_gin_a100.sh b/scripts/test/gin/test-last-th2_gin_a100.sh deleted file mode 100644 index 76db6301d..000000000 --- a/scripts/test/gin/test-last-th2_gin_a100.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-last-th2_gcn_a100 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-last-th2_gcn_a100.out -#SBATCH --error=outputs/error_test-last-th2_gcn_a100.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/test/gine/test-best-th2_gine_v100.sh b/scripts/test/gine/test-best-th2_gine_v100.sh deleted file mode 100644 index a88254f4d..000000000 --- a/scripts/test/gine/test-best-th2_gine_v100.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-best-th2_gine_v100 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-best-th2_gine_v100.out -#SBATCH --error=outputs/error_test-best-th2_gine_v100.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" \ - +ckpt_name_for_testing="best" - \ No newline at end of file diff --git a/scripts/test/gine/test-last-th2_gine_a100.sh b/scripts/test/gine/test-last-th2_gine_a100.sh deleted file mode 100644 index 9a1bab0eb..000000000 --- a/scripts/test/gine/test-last-th2_gine_a100.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-last-th2_gine_c112 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-last-th2_gine_c112.out -#SBATCH --error=outputs/error_test-last-th2_gine_c112.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=cpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/cpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/test/gine/test-last-th2_gine_a112.sh b/scripts/test/gine/test-last-th2_gine_a112.sh deleted file mode 100644 index 90d339758..000000000 --- a/scripts/test/gine/test-last-th2_gine_a112.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-last-th2_gine_a100 - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/test-last-th2_gine_a100.out -#SBATCH --error=outputs/error_test-last-th2_gine_a100.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/test/test-on-cpu.sh b/scripts/test/test-on-cpu.sh deleted file mode 100644 index 398739a03..000000000 --- a/scripts/test/test-on-cpu.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=test-on-cpu - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/name=test-on-cpu.out -#SBATCH --error=outputs/error_name=test-on-cpu.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -micromamba run -n graphium -c graphium-test architecture=largemix tasks=largemix training=largemix \ - accelerator=cpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" \ - model=gcn +ckpt_name_for_testing="best" - \ No newline at end of file diff --git a/scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh b/scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh deleted file mode 100644 index 20d6f764e..000000000 --- a/scripts/train/gcn/gpu/large_th2_gcn_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th2_gcn_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th2_gcn_a100_disk.out -#SBATCH --error=outputs/error_large_th2_gcn_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh b/scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh deleted file mode 100644 index e30951829..000000000 --- a/scripts/train/gcn/gpu/large_th3_gcn_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th3_gcn_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th3_gcn_a100_disk.out -#SBATCH --error=outputs/error_large_th3_gcn_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh b/scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh deleted file mode 100644 index 556cc5b57..000000000 --- a/scripts/train/gcn/gpu/large_th4_gcn_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th4_gcn_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th4_gcn_a100_disk.out -#SBATCH --error=outputs/error_large_th4_gcn_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gcn accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gcn/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gin/gpu/large_th2_gin_a100_disk.sh b/scripts/train/gin/gpu/large_th2_gin_a100_disk.sh deleted file mode 100644 index 6a6d8a701..000000000 --- a/scripts/train/gin/gpu/large_th2_gin_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th2_gin_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th2_gin_a100_disk.out -#SBATCH --error=outputs/error_large_th2_gin_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gin accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gin/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gin/gpu/large_th3_gin_a100_disk.sh b/scripts/train/gin/gpu/large_th3_gin_a100_disk.sh deleted file mode 100644 index 94570d301..000000000 --- a/scripts/train/gin/gpu/large_th3_gin_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th3_gin_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th3_gin_a100_disk.out -#SBATCH --error=outputs/error_large_th3_gin_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gin accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gin/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gin/gpu/large_th4_gin_a100_disk.sh b/scripts/train/gin/gpu/large_th4_gin_a100_disk.sh deleted file mode 100644 index c900f1102..000000000 --- a/scripts/train/gin/gpu/large_th4_gin_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th4_gin_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th4_gin_a100_disk.out -#SBATCH --error=outputs/error_large_th4_gin_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gin accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gin/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gine/cpu/large_th2_gine_c112_disk.sh b/scripts/train/gine/cpu/large_th2_gine_c112_disk.sh deleted file mode 100644 index 86ef64043..000000000 --- a/scripts/train/gine/cpu/large_th2_gine_c112_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th2_gine_c112_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th2_gine_c112_disk.out -#SBATCH --error=outputs/error_large_th2_gine_c112_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=cpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/cpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gine/cpu/large_th3_gine_c112_disk.sh b/scripts/train/gine/cpu/large_th3_gine_c112_disk.sh deleted file mode 100644 index 4d1d51a94..000000000 --- a/scripts/train/gine/cpu/large_th3_gine_c112_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th3_gine_c112_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th3_gine_c112_disk.out -#SBATCH --error=outputs/error_large_th3_gine_c112_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=cpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gine/cpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gine/cpu/large_th4_gine_c112_disk.sh b/scripts/train/gine/cpu/large_th4_gine_c112_disk.sh deleted file mode 100644 index 2262f6e5a..000000000 --- a/scripts/train/gine/cpu/large_th4_gine_c112_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th4_gine_c112_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th4_gine_c112_disk.out -#SBATCH --error=outputs/error_large_th4_gine_c112_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=cpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gine/cpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gine/gpu/large_th2_gine_a100_disk.sh b/scripts/train/gine/gpu/large_th2_gine_a100_disk.sh deleted file mode 100644 index f0ceda809..000000000 --- a/scripts/train/gine/gpu/large_th2_gine_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th2_gine_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th2_gine_a100_disk.out -#SBATCH --error=outputs/error_large_th2_gine_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th2/gine/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gine/gpu/large_th3_gine_a100_disk.sh b/scripts/train/gine/gpu/large_th3_gine_a100_disk.sh deleted file mode 100644 index 575df9c99..000000000 --- a/scripts/train/gine/gpu/large_th3_gine_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th3_gine_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th3_gine_a100_disk.out -#SBATCH --error=outputs/error_large_th3_gine_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th3/gine/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th3.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th3.csv.gz" - \ No newline at end of file diff --git a/scripts/train/gine/gpu/large_th4_gine_a100_disk.sh b/scripts/train/gine/gpu/large_th4_gine_a100_disk.sh deleted file mode 100644 index aec748abe..000000000 --- a/scripts/train/gine/gpu/large_th4_gine_a100_disk.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=large_th4_gine_a100_disk - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/large_th4_gine_a100_disk.out -#SBATCH --error=outputs/error_large_th4_gine_a100_disk.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train architecture=largemix tasks=largemix training=largemix \ - model=gine accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/large-dataset/th4/gine/gpu/" \ - datamodule.args.task_specific_args.l1000_vcap.df_path="expts/data/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv.gz" \ - datamodule.args.task_specific_args.l1000_mcf7.df_path="expts/data/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv.gz" - \ No newline at end of file diff --git a/scripts/train/sweep.sh b/scripts/train/sweep.sh deleted file mode 100644 index c1647349f..000000000 --- a/scripts/train/sweep.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=c112 - -set -e - -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/d2cm706t \ No newline at end of file diff --git a/scripts/train/test.sh b/scripts/train/test.sh deleted file mode 100644 index e24adc744..000000000 --- a/scripts/train/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -source /home/frederik_valencediscovery_com/.bashrc -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/d2cm706t \ No newline at end of file diff --git a/scripts/train/train.sh b/scripts/train/train.sh deleted file mode 100644 index 0c3b44fed..000000000 --- a/scripts/train/train.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/" \ - predictor.optim_kwargs.lr=0.0002 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-100.sh b/single_runs/l1000_mcf7/gcn-100.sh deleted file mode 100644 index 0ab762292..000000000 --- a/single_runs/l1000_mcf7/gcn-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-200.sh b/single_runs/l1000_mcf7/gcn-200.sh deleted file mode 100644 index 499e426ac..000000000 --- a/single_runs/l1000_mcf7/gcn-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gcn-300.sh b/single_runs/l1000_mcf7/gcn-300.sh deleted file mode 100644 index 8542561af..000000000 --- a/single_runs/l1000_mcf7/gcn-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gcn/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-100.sh b/single_runs/l1000_mcf7/gin-100.sh deleted file mode 100644 index c48d1f955..000000000 --- a/single_runs/l1000_mcf7/gin-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-200.sh b/single_runs/l1000_mcf7/gin-200.sh deleted file mode 100644 index e4fcf7d42..000000000 --- a/single_runs/l1000_mcf7/gin-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gin-300.sh b/single_runs/l1000_mcf7/gin-300.sh deleted file mode 100644 index 2be5b4d30..000000000 --- a/single_runs/l1000_mcf7/gin-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gin/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-100.sh b/single_runs/l1000_mcf7/gine-100.sh deleted file mode 100644 index 9ff03e682..000000000 --- a/single_runs/l1000_mcf7/gine-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-200.sh b/single_runs/l1000_mcf7/gine-200.sh deleted file mode 100644 index 925f9f202..000000000 --- a/single_runs/l1000_mcf7/gine-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_mcf7/gine-300.sh b/single_runs/l1000_mcf7/gine-300.sh deleted file mode 100644 index f86fa4e31..000000000 --- a/single_runs/l1000_mcf7/gine-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-100.sh b/single_runs/l1000_vcap/gcn-100.sh deleted file mode 100644 index 307e9dde8..000000000 --- a/single_runs/l1000_vcap/gcn-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-200.sh b/single_runs/l1000_vcap/gcn-200.sh deleted file mode 100644 index b5a4df952..000000000 --- a/single_runs/l1000_vcap/gcn-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gcn-300.sh b/single_runs/l1000_vcap/gcn-300.sh deleted file mode 100644 index 5331cce48..000000000 --- a/single_runs/l1000_vcap/gcn-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gcn/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-100.sh b/single_runs/l1000_vcap/gin-100.sh deleted file mode 100644 index df8154294..000000000 --- a/single_runs/l1000_vcap/gin-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-200.sh b/single_runs/l1000_vcap/gin-200.sh deleted file mode 100644 index fe0c652ea..000000000 --- a/single_runs/l1000_vcap/gin-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gin-300.sh b/single_runs/l1000_vcap/gin-300.sh deleted file mode 100644 index 4304fa941..000000000 --- a/single_runs/l1000_vcap/gin-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gin/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-100.sh b/single_runs/l1000_vcap/gine-100.sh deleted file mode 100644 index 90071ba3c..000000000 --- a/single_runs/l1000_vcap/gine-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=100 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-200.sh b/single_runs/l1000_vcap/gine-200.sh deleted file mode 100644 index ee113eaee..000000000 --- a/single_runs/l1000_vcap/gine-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=200 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/l1000_vcap/gine-300.sh b/single_runs/l1000_vcap/gine-300.sh deleted file mode 100644 index e57c0dca0..000000000 --- a/single_runs/l1000_vcap/gine-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_vcap \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_vcap/gine/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=300 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-100.sh b/single_runs/pcba_1328/gcn-100.sh deleted file mode 100644 index 7de88e2fa..000000000 --- a/single_runs/pcba_1328/gcn-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-200.sh b/single_runs/pcba_1328/gcn-200.sh deleted file mode 100644 index d30e8fd52..000000000 --- a/single_runs/pcba_1328/gcn-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gcn-300.sh b/single_runs/pcba_1328/gcn-300.sh deleted file mode 100644 index 71cc4af2c..000000000 --- a/single_runs/pcba_1328/gcn-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gcn \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gcn/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-100.sh b/single_runs/pcba_1328/gin-100.sh deleted file mode 100644 index 58a9aed4f..000000000 --- a/single_runs/pcba_1328/gin-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-200.sh b/single_runs/pcba_1328/gin-200.sh deleted file mode 100644 index ad501ea46..000000000 --- a/single_runs/pcba_1328/gin-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gin-300.sh b/single_runs/pcba_1328/gin-300.sh deleted file mode 100644 index b245e9592..000000000 --- a/single_runs/pcba_1328/gin-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gin \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gin/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-100.sh b/single_runs/pcba_1328/gine-100.sh deleted file mode 100644 index c4a1650b4..000000000 --- a/single_runs/pcba_1328/gine-100.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/100/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=400 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-200.sh b/single_runs/pcba_1328/gine-200.sh deleted file mode 100644 index b2dbd2d7f..000000000 --- a/single_runs/pcba_1328/gine-200.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/200/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=500 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/single_runs/pcba_1328/gine-300.sh b/single_runs/pcba_1328/gine-300.sh deleted file mode 100644 index fe9afb79d..000000000 --- a/single_runs/pcba_1328/gine-300.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1004 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=pcba_1328 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/pcba_1328/gine/300/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.seed=600 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file diff --git a/sweeps/gcn-l1000_mcf7.sh b/sweeps/gcn-l1000_mcf7.sh deleted file mode 100644 index 4feae870d..000000000 --- a/sweeps/gcn-l1000_mcf7.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/ntzo59la \ No newline at end of file diff --git a/sweeps/gcn-l1000_vcap.sh b/sweeps/gcn-l1000_vcap.sh deleted file mode 100644 index 9777c16c6..000000000 --- a/sweeps/gcn-l1000_vcap.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/0d3n1d5g \ No newline at end of file diff --git a/sweeps/gcn-pcba_1328.sh b/sweeps/gcn-pcba_1328.sh deleted file mode 100644 index fc64ccb3b..000000000 --- a/sweeps/gcn-pcba_1328.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1004 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/c22gvpm5 \ No newline at end of file diff --git a/sweeps/gin-l1000_mcf7.sh b/sweeps/gin-l1000_mcf7.sh deleted file mode 100644 index cc6c08688..000000000 --- a/sweeps/gin-l1000_mcf7.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/s12dfu36 \ No newline at end of file diff --git a/sweeps/gin-l1000_vcap.sh b/sweeps/gin-l1000_vcap.sh deleted file mode 100644 index 4484d988d..000000000 --- a/sweeps/gin-l1000_vcap.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/kmtjw7up \ No newline at end of file diff --git a/sweeps/gin-pcba_1328.sh b/sweeps/gin-pcba_1328.sh deleted file mode 100644 index 4a425728d..000000000 --- a/sweeps/gin-pcba_1328.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1004 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/xzyzv04b \ No newline at end of file diff --git a/sweeps/gine-l1000_mcf7.sh b/sweeps/gine-l1000_mcf7.sh deleted file mode 100644 index f4ad85467..000000000 --- a/sweeps/gine-l1000_mcf7.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/4v2rsrvo \ No newline at end of file diff --git a/sweeps/gine-l1000_vcap.sh b/sweeps/gine-l1000_vcap.sh deleted file mode 100644 index dc5361aef..000000000 --- a/sweeps/gine-l1000_vcap.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/cxmipyd8 \ No newline at end of file diff --git a/sweeps/gine-pcba_1328.sh b/sweeps/gine-pcba_1328.sh deleted file mode 100644 index 0c540fbd8..000000000 --- a/sweeps/gine-pcba_1328.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=a1004 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/jtj1cb4r \ No newline at end of file diff --git a/test_sweep.sh b/test_sweep.sh deleted file mode 100644 index 2541c1104..000000000 --- a/test_sweep.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=sweep - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/sweep.out -#SBATCH --error=outputs/error_sweep.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -source /home/frederik_valencediscovery_com/.bashrc -cd /home/frederik_valencediscovery_com/projects/graphium_expts -source activate graphium_dev - -wandb agent multitask-gnn/neurips2023-large-single-dataset/911gmdar \ No newline at end of file diff --git a/train.sh b/train.sh deleted file mode 100644 index 9a9d12db7..000000000 --- a/train.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -## Name of your SLURM job -#SBATCH --job-name=train - -## Files for logs: here we redirect stoout and sterr to the same file -#SBATCH --output=outputs/train.out -#SBATCH --error=outputs/error_train.out -#SBATCH --open-mode=append - -## Time limit for the job -#SBATCH --time=120:00:00 - -## Partition to use, -#SBATCH --partition=v1001 - -set -e - -micromamba run -n graphium -c graphium-train \ - model=gine \ - architecture=largemix \ - tasks=l1000_mcf7 \ - training=largemix \ - accelerator=gpu \ - trainer.model_checkpoint.dirpath="model_checkpoints/l1000_mcf7/gine/" \ - predictor.optim_kwargs.lr=0.0004 \ - constants.wandb.project="neurips2023-large-single-dataset" \ \ No newline at end of file From eb037defed7f192c37e8ab1816335fdece7ec014 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Thu, 24 Aug 2023 14:34:32 -0400 Subject: [PATCH 21/30] Minor reorganization --- .gitignore | 2 +- scripts/convert_yml.py | 29 +++++++++++++++++++++++++++++ scripts/ipu_start.sh | 11 +++++++++++ scripts/ipu_venv.sh | 16 ++++++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 scripts/convert_yml.py create mode 100644 scripts/ipu_start.sh create mode 100644 scripts/ipu_venv.sh diff --git a/.gitignore b/.gitignore index 371e5343c..8551dfb44 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,7 @@ datacache/ tests/temp_cache* predictions/ draft/ -scripts/ +scripts-expts/ # Data and predictions graphium/data/ZINC_bench_gnn/ diff --git a/scripts/convert_yml.py b/scripts/convert_yml.py new file mode 100644 index 000000000..90cb8e3c1 --- /dev/null +++ b/scripts/convert_yml.py @@ -0,0 +1,29 @@ +""" +Convert the dependencies from conda's `env.yml` to pip `requirements.txt` +""" + +import ruamel.yaml + +yaml = ruamel.yaml.YAML() +data = yaml.load(open("env.yml")) + +requirements = [] +for dep in data["dependencies"]: + if isinstance(dep, str): + outputs = dep.split("=") + if len(outputs) == 1: + package = outputs[0] + requirements.append(package) + elif len(outputs) == 2: + package, package_version = outputs[0], outputs[1] + requirements.append(package + "==" + package_version) + elif len(outputs) == 3: + package, package_version, python_version = outputs[0], outputs[1], outputs[2] + requirements.append(package + "==" + package_version) + elif isinstance(dep, dict): + for preq in dep.get("pip", []): + requirements.append(preq) + +with open("requirements.txt", "w") as fp: + for requirement in requirements: + print(requirement, file=fp) diff --git a/scripts/ipu_start.sh b/scripts/ipu_start.sh new file mode 100644 index 000000000..4c45da5c0 --- /dev/null +++ b/scripts/ipu_start.sh @@ -0,0 +1,11 @@ +""" +Start the ipu environment and SDK +""" + +source /opt/gc/sdk-3.0.0+1128/poplar-ubuntu_20_04-3.0.0+5468-0379b9a65d/enable.sh +source /opt/gc/sdk-3.0.0+1128/popart-ubuntu_20_04-3.0.0+5468-0379b9a65d/enable.sh + +source ~/.venv/graphium_ipu/bin/activate # Change to your path + +export VISUAL=vim +export EDITOR="$VISUAL" diff --git a/scripts/ipu_venv.sh b/scripts/ipu_venv.sh new file mode 100644 index 000000000..9c9a96976 --- /dev/null +++ b/scripts/ipu_venv.sh @@ -0,0 +1,16 @@ +""" +Create the pip environment for IPU +""" + +## Uncomment this to create the folder for the environment +# mkdir ~/.venv # Create the folder for the environment +# python3 -m venv ~/.venv/graphium_ipu # Create the environment +# source ~/.venv/graphium_ipu/bin/activate # Activate the environment + +# Installing the dependencies for the IPU environment +pip install torch==1.10+cpu torchvision==0.11+cpu torchaudio==0.10 -f https://download.pytorch.org/whl/torch_stable.html +pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cpu.html +pip install dgl dglgo -f https://data.dgl.ai/wheels/repo.html +pip install /opt/gc/sdk-3.0.0+1128/poptorch-3.0.0+84519_672c9cbc7f_ubuntu_20_04-cp38-cp38-linux_x86_64.whl +pip install -r requirements.txt +pip install -e . From cf88c3f042d69090e9a4b63f9f5fd583b5d3971e Mon Sep 17 00:00:00 2001 From: WenkelF Date: Thu, 24 Aug 2023 18:30:55 -0400 Subject: [PATCH 22/30] Enabling to resume training --- graphium/cli/train_finetune.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/graphium/cli/train_finetune.py b/graphium/cli/train_finetune.py index e6ead5122..e2a502aed 100644 --- a/graphium/cli/train_finetune.py +++ b/graphium/cli/train_finetune.py @@ -44,12 +44,16 @@ def run_training_finetuning(cfg: DictConfig) -> None: st = timeit.default_timer() + ckpt_path = cfg["trainer"].pop("ckpt_path", None) + wandb_cfg = cfg["constants"].get("wandb") if wandb_cfg is not None: wandb.init( entity=wandb_cfg["entity"], project=wandb_cfg["project"], config=cfg, + id=wandb_cfg.pop("id", None), + # resume="must", ) ## == Instantiate all required objects from their respective configs == @@ -100,7 +104,7 @@ def run_training_finetuning(cfg: DictConfig) -> None: # Run the model training with SafeRun(name="TRAINING", raise_error=cfg["constants"]["raise_train_error"], verbose=True): - trainer.fit(model=predictor, datamodule=datamodule) + trainer.fit(model=predictor, datamodule=datamodule, ckpt_path=ckpt_path) # Determine the max num nodes and edges in testing predictor.set_max_nodes_edges_per_graph(datamodule, stages=["test"]) From 5781dcbc55f88317ba6d1966f3b0d7f5fbfbc82c Mon Sep 17 00:00:00 2001 From: WenkelF Date: Sun, 27 Aug 2023 23:35:03 -0400 Subject: [PATCH 23/30] Finalizing largemix and large single dataset configs for hydra --- .../loss_metrics_datamodule/l1000_mcf7.yaml | 2 +- .../loss_metrics_datamodule/l1000_vcap.yaml | 2 +- .../loss_metrics_datamodule/largemix.yaml | 5 ++ .../loss_metrics_datamodule/pcba_1328.yaml | 2 +- .../loss_metrics_datamodule/pcqm4m_g25.yaml | 46 +++++++++++++++++++ .../loss_metrics_datamodule/pcqm4m_n4.yaml | 45 ++++++++++++++++++ expts/hydra-configs/tasks/pcqm4m_g25.yaml | 7 +++ expts/hydra-configs/tasks/pcqm4m_n4.yaml | 7 +++ .../tasks/task_heads/pcqm4m_g25.yaml | 15 ++++++ .../tasks/task_heads/pcqm4m_n4.yaml | 15 ++++++ .../training/accelerator/largemix_ipu.yaml | 24 ++++++++++ 11 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml create mode 100644 expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml create mode 100644 expts/hydra-configs/tasks/pcqm4m_g25.yaml create mode 100644 expts/hydra-configs/tasks/pcqm4m_n4.yaml create mode 100644 expts/hydra-configs/tasks/task_heads/pcqm4m_g25.yaml create mode 100644 expts/hydra-configs/tasks/task_heads/pcqm4m_n4.yaml create mode 100644 expts/hydra-configs/training/accelerator/largemix_ipu.yaml diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml index d6c9fa765..43933a7fa 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml @@ -45,5 +45,5 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` - split_names: [train, val, test_seen] + # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml index 08450ed01..27b89d862 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml @@ -45,5 +45,5 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` - split_names: [train, val, test_seen] + # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml index 96d550cef..921960cd1 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml @@ -92,6 +92,7 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 l1000_mcf7: @@ -104,6 +105,7 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 pcba_1328: @@ -116,6 +118,7 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 pcqm4m_g25: @@ -128,6 +131,7 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # split_names: [train, val, test_seen] label_normalization: normalize_val_test: True method: "normal" @@ -143,6 +147,7 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: node splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # split_names: [train, val, test_seen] seed: 42 label_normalization: normalize_val_test: True diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml index 469dce23c..adc3321a0 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml @@ -37,5 +37,5 @@ datamodule: # sample_size: 2000 # use sample_size for test task_level: graph splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` - split_names: [train, val, test_seen] + # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml new file mode 100644 index 000000000..047701f6e --- /dev/null +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml @@ -0,0 +1,46 @@ +# @package _global_ + +predictor: + metrics_on_progress_bar: + pcqm4m_g25: [] + metrics_on_training_set: + pcqm4m_g25: [] + loss_fun: + pcqm4m_g25: mae_ipu + +metrics: + pcqm4m_g25: + - name: mae + metric: mae_ipu + target_nan_mask: null + multitask_handling: mean-per-label + threshold_kwargs: null + - name: pearsonr + metric: pearsonr_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + - name: r2 + metric: r2_score_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + +datamodule: + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + pcqm4m_g25: + df: null + df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # or set path as the URL directly + smiles_col: "ordered_smiles" + label_cols: graph_* # graph_* means all columns starting with "graph_" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # split_names: [train, val, test_seen] + label_normalization: + normalize_val_test: True + method: "normal" + epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml new file mode 100644 index 000000000..494843464 --- /dev/null +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml @@ -0,0 +1,45 @@ +# @package _global_ + +predictor: + metrics_on_progress_bar: + pcqm4m_n4: [] + metrics_on_training_set: + pcqm4m_n4: [] + loss_fun: + pcqm4m_n4: mae_ipu + +metrics: + pcqm4m_n4: + - name: mae + metric: mae_ipu + target_nan_mask: null + multitask_handling: mean-per-label + threshold_kwargs: null + - name: pearsonr + metric: pearsonr_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + - name: r2 + metric: r2_score_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + +datamodule: + pcqm4m_n4: + df: null + df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # or set path as the URL directly + smiles_col: "ordered_smiles" + label_cols: node_* # node_* means all columns starting with "node_" + # sample_size: 2000 # use sample_size for test + task_level: node + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # split_names: [train, val, test_seen] + seed: 42 + label_normalization: + normalize_val_test: True + method: "normal" + epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/pcqm4m_g25.yaml b/expts/hydra-configs/tasks/pcqm4m_g25.yaml new file mode 100644 index 000000000..1d5b03469 --- /dev/null +++ b/expts/hydra-configs/tasks/pcqm4m_g25.yaml @@ -0,0 +1,7 @@ +# NOTE: We cannot have a single config, since for fine-tuning we will +# only want to override the loss_metrics_datamodule, whereas for training we will +# want to override both. + +defaults: + - task_heads: pcqm4m_g25 + - loss_metrics_datamodule: pcqm4m_g25 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/pcqm4m_n4.yaml b/expts/hydra-configs/tasks/pcqm4m_n4.yaml new file mode 100644 index 000000000..daa077ccc --- /dev/null +++ b/expts/hydra-configs/tasks/pcqm4m_n4.yaml @@ -0,0 +1,7 @@ +# NOTE: We cannot have a single config, since for fine-tuning we will +# only want to override the loss_metrics_datamodule, whereas for training we will +# want to override both. + +defaults: + - task_heads: pcqm4m_n4 + - loss_metrics_datamodule: pcqm4m_n4 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/task_heads/pcqm4m_g25.yaml b/expts/hydra-configs/tasks/task_heads/pcqm4m_g25.yaml new file mode 100644 index 000000000..813ab1997 --- /dev/null +++ b/expts/hydra-configs/tasks/task_heads/pcqm4m_g25.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +architecture: + task_heads: + pcqm4m_g25: + task_level: graph + out_dim: 25 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none \ No newline at end of file diff --git a/expts/hydra-configs/tasks/task_heads/pcqm4m_n4.yaml b/expts/hydra-configs/tasks/task_heads/pcqm4m_n4.yaml new file mode 100644 index 000000000..dda781cb0 --- /dev/null +++ b/expts/hydra-configs/tasks/task_heads/pcqm4m_n4.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +architecture: + task_heads: + pcqm4m_n4: + task_level: node + out_dim: 4 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: "none" + residual_type: none \ No newline at end of file diff --git a/expts/hydra-configs/training/accelerator/largemix_ipu.yaml b/expts/hydra-configs/training/accelerator/largemix_ipu.yaml new file mode 100644 index 000000000..090600e98 --- /dev/null +++ b/expts/hydra-configs/training/accelerator/largemix_ipu.yaml @@ -0,0 +1,24 @@ +# @package _global_ + +datamodule: + args: + ipu_dataloader_training_opts: + mode: async + max_num_nodes_per_graph: 30 # train max nodes: 20, max_edges: 54 + max_num_edges_per_graph: 100 + ipu_dataloader_inference_opts: + mode: async + max_num_nodes_per_graph: 35 # valid max nodes: 51, max_edges: 118 + max_num_edges_per_graph: 100 + # Data handling-related + batch_size_training: 30 + batch_size_inference: 30 + +predictor: + optim_kwargs: + loss_scaling: 1024 + +trainer: + trainer: + precision: 16-true + accumulate_grad_batches: 2 \ No newline at end of file From 93d12be8f4340d65ad1641ce3213c57a91931031 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Sun, 27 Aug 2023 23:38:07 -0400 Subject: [PATCH 24/30] Temporary removing code for resuming training in favor of dedicated pr --- graphium/cli/train_finetune.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/graphium/cli/train_finetune.py b/graphium/cli/train_finetune.py index e2a502aed..e6ead5122 100644 --- a/graphium/cli/train_finetune.py +++ b/graphium/cli/train_finetune.py @@ -44,16 +44,12 @@ def run_training_finetuning(cfg: DictConfig) -> None: st = timeit.default_timer() - ckpt_path = cfg["trainer"].pop("ckpt_path", None) - wandb_cfg = cfg["constants"].get("wandb") if wandb_cfg is not None: wandb.init( entity=wandb_cfg["entity"], project=wandb_cfg["project"], config=cfg, - id=wandb_cfg.pop("id", None), - # resume="must", ) ## == Instantiate all required objects from their respective configs == @@ -104,7 +100,7 @@ def run_training_finetuning(cfg: DictConfig) -> None: # Run the model training with SafeRun(name="TRAINING", raise_error=cfg["constants"]["raise_train_error"], verbose=True): - trainer.fit(model=predictor, datamodule=datamodule, ckpt_path=ckpt_path) + trainer.fit(model=predictor, datamodule=datamodule) # Determine the max num nodes and edges in testing predictor.set_max_nodes_edges_per_graph(datamodule, stages=["test"]) From 8a80c52eaf93bd208b0daa7e2916b819c22481ae Mon Sep 17 00:00:00 2001 From: WenkelF Date: Sun, 27 Aug 2023 23:41:36 -0400 Subject: [PATCH 25/30] Cleaning up --- env.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/env.yml b/env.yml index 7fc668692..e49d071a4 100644 --- a/env.yml +++ b/env.yml @@ -28,7 +28,7 @@ dependencies: - gcsfs >=2021.6 # ML packages - - cudatoolkit # works also with CPU-only system. + - cuda-version # works also with CPU-only system. - pytorch >=1.12 - lightning >=2.0 - torchmetrics >=0.7.0,<0.11 diff --git a/pyproject.toml b/pyproject.toml index 5b8bde087..20cfa9792 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,9 +64,8 @@ dependencies = [ [project.scripts] graphium = "graphium.cli.main:main_cli" - graphium-prepare-data = "graphium.cli.prepare_data:cli" graphium-train = "graphium.cli.train_finetune:cli" - graphium-test = "graphium.cli.test:cli" + graphium-prepare-data = "graphium.cli.prepare_data:cli" [project.urls] Website = "https://graphium.datamol.io/" From 67e413499c2d52fb046701dad71380767c16a426 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Sun, 27 Aug 2023 23:48:14 -0400 Subject: [PATCH 26/30] Reformatting with black --- graphium/cli/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/graphium/cli/test.py b/graphium/cli/test.py index d4070be03..71c55a7b6 100644 --- a/graphium/cli/test.py +++ b/graphium/cli/test.py @@ -50,7 +50,9 @@ def run_testing(cfg: DictConfig) -> None: datamodule = load_datamodule(cfg, accelerator_type) ## Load Predictor - predictor = PredictorModule.load_from_checkpoint(checkpoint_path=get_checkpoint_path(cfg), map_location=cfg["accelerator"]["type"]) + predictor = PredictorModule.load_from_checkpoint( + checkpoint_path=get_checkpoint_path(cfg), map_location=cfg["accelerator"]["type"] + ) ## Load Trainer date_time_suffix = datetime.now().strftime("%d.%m.%Y_%H.%M.%S") From da31797946fe2f094c355aca452f0df5d872fcbe Mon Sep 17 00:00:00 2001 From: DomInvivo Date: Wed, 30 Aug 2023 14:16:10 -0400 Subject: [PATCH 27/30] Added the date-time in the model_checkpoint paths --- expts/hydra-configs/README.md | 4 ++-- expts/hydra-configs/experiment/toymix_mpnn.yaml | 2 +- expts/hydra-configs/training/largemix.yaml | 4 ++-- expts/hydra-configs/training/model/largemix_gcn.yaml | 2 +- expts/hydra-configs/training/model/largemix_gin.yaml | 2 +- expts/hydra-configs/training/model/largemix_gine.yaml | 2 +- expts/hydra-configs/training/model/pcqm4m_gpspp.yaml | 2 +- expts/hydra-configs/training/model/pcqm4m_mpnn.yaml | 2 +- expts/hydra-configs/training/model/toymix_gcn.yaml | 2 +- expts/hydra-configs/training/model/toymix_gin.yaml | 2 +- expts/hydra-configs/training/pcqm4m.yaml | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/expts/hydra-configs/README.md b/expts/hydra-configs/README.md index f695ae20c..40625917d 100644 --- a/expts/hydra-configs/README.md +++ b/expts/hydra-configs/README.md @@ -33,7 +33,7 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gin/ + dirpath: models_checkpoints/neurips2023-small-gin/${now:%Y-%m-%d_%H-%M-%S}/ ``` We can now utilize `hydra` to e.g., run a sweep over our models on the ToyMix dataset via @@ -43,7 +43,7 @@ graphium-train -m model=gcn,gin where the ToyMix dataset is pre-configured in `main.yaml`. Read on to find out how to define new datasets and architectures for pre-training and fine-tuning. ## Pre-training / Fine-tuning -Say you trained a model with the following command: +Say you trained a model with the following command: ```bash graphium-train --config-name "main" ``` diff --git a/expts/hydra-configs/experiment/toymix_mpnn.yaml b/expts/hydra-configs/experiment/toymix_mpnn.yaml index b8b552b66..d79311335 100644 --- a/expts/hydra-configs/experiment/toymix_mpnn.yaml +++ b/expts/hydra-configs/experiment/toymix_mpnn.yaml @@ -10,4 +10,4 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-mpnn/ \ No newline at end of file + dirpath: models_checkpoints/neurips2023-small-mpnn/${now:%Y-%m-%d_%H-%M-%S}/ \ No newline at end of file diff --git a/expts/hydra-configs/training/largemix.yaml b/expts/hydra-configs/training/largemix.yaml index cb95fb2fe..7c1a67953 100644 --- a/expts/hydra-configs/training/largemix.yaml +++ b/expts/hydra-configs/training/largemix.yaml @@ -21,13 +21,13 @@ trainer: name: ${constants.name} project: ${constants.name} model_checkpoint: - dirpath: model_checkpoints/large-dataset/ + dirpath: model_checkpoints/large-dataset/${now:%Y-%m-%d_%H-%M-%S}/ filename: ${constants.name} save_last: True # saving last model # save_top_k: 1 # and best model # monitor: loss/val # wrt validation loss trainer: precision: 16-mixed - max_epochs: ${predictor.torch_scheduler_kwargs.max_num_epochs} + max_epochs: ${predictor.torch_scheduler_kwargs.max_num_epochs} min_epochs: 1 check_val_every_n_epoch: 20 \ No newline at end of file diff --git a/expts/hydra-configs/training/model/largemix_gcn.yaml b/expts/hydra-configs/training/model/largemix_gcn.yaml index a18ea412e..04864ecc9 100644 --- a/expts/hydra-configs/training/model/largemix_gcn.yaml +++ b/expts/hydra-configs/training/model/largemix_gcn.yaml @@ -15,4 +15,4 @@ constants: trainer: model_checkpoint: - dirpath: model_checkpoints/large-dataset/gcn/ \ No newline at end of file + dirpath: model_checkpoints/large-dataset/gcn/${now:%Y-%m-%d_%H-%M-%S}/ \ No newline at end of file diff --git a/expts/hydra-configs/training/model/largemix_gin.yaml b/expts/hydra-configs/training/model/largemix_gin.yaml index 4cfeeec9f..41c12a014 100644 --- a/expts/hydra-configs/training/model/largemix_gin.yaml +++ b/expts/hydra-configs/training/model/largemix_gin.yaml @@ -15,4 +15,4 @@ constants: trainer: model_checkpoint: - dirpath: model_checkpoints/large-dataset/gin/ \ No newline at end of file + dirpath: model_checkpoints/large-dataset/gin/${now:%Y-%m-%d_%H-%M-%S}/ \ No newline at end of file diff --git a/expts/hydra-configs/training/model/largemix_gine.yaml b/expts/hydra-configs/training/model/largemix_gine.yaml index 7fd722b2d..99cdee3eb 100644 --- a/expts/hydra-configs/training/model/largemix_gine.yaml +++ b/expts/hydra-configs/training/model/largemix_gine.yaml @@ -15,4 +15,4 @@ constants: trainer: model_checkpoint: - dirpath: model_checkpoints/large-dataset/gine/ \ No newline at end of file + dirpath: model_checkpoints/large-dataset/gine/${now:%Y-%m-%d_%H-%M-%S}/ \ No newline at end of file diff --git a/expts/hydra-configs/training/model/pcqm4m_gpspp.yaml b/expts/hydra-configs/training/model/pcqm4m_gpspp.yaml index 7fb1e1ee5..c156020b5 100644 --- a/expts/hydra-configs/training/model/pcqm4m_gpspp.yaml +++ b/expts/hydra-configs/training/model/pcqm4m_gpspp.yaml @@ -10,4 +10,4 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/gpspp/ + dirpath: models_checkpoints/PCMQ4Mv2/gpspp/${now:%Y-%m-%d_%H-%M-%S}/ diff --git a/expts/hydra-configs/training/model/pcqm4m_mpnn.yaml b/expts/hydra-configs/training/model/pcqm4m_mpnn.yaml index ca643fe39..bab3896d1 100644 --- a/expts/hydra-configs/training/model/pcqm4m_mpnn.yaml +++ b/expts/hydra-configs/training/model/pcqm4m_mpnn.yaml @@ -10,4 +10,4 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/mpnn/ + dirpath: models_checkpoints/PCMQ4Mv2/mpnn/${now:%Y-%m-%d_%H-%M-%S}/ diff --git a/expts/hydra-configs/training/model/toymix_gcn.yaml b/expts/hydra-configs/training/model/toymix_gcn.yaml index 57796adfa..4180515db 100644 --- a/expts/hydra-configs/training/model/toymix_gcn.yaml +++ b/expts/hydra-configs/training/model/toymix_gcn.yaml @@ -10,4 +10,4 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/small-dataset/gcn/ \ No newline at end of file + dirpath: models_checkpoints/small-dataset/gcn/${now:%Y-%m-%d_%H-%M-%S}/ \ No newline at end of file diff --git a/expts/hydra-configs/training/model/toymix_gin.yaml b/expts/hydra-configs/training/model/toymix_gin.yaml index 459694c9a..10007bd32 100644 --- a/expts/hydra-configs/training/model/toymix_gin.yaml +++ b/expts/hydra-configs/training/model/toymix_gin.yaml @@ -10,4 +10,4 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gin/ \ No newline at end of file + dirpath: models_checkpoints/neurips2023-small-gin/${now:%Y-%m-%d_%H-%M-%S}/ \ No newline at end of file diff --git a/expts/hydra-configs/training/pcqm4m.yaml b/expts/hydra-configs/training/pcqm4m.yaml index 871a2a5f1..58860f807 100644 --- a/expts/hydra-configs/training/pcqm4m.yaml +++ b/expts/hydra-configs/training/pcqm4m.yaml @@ -28,7 +28,7 @@ trainer: # patience: 10 # mode: &mode min model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/ + dirpath: models_checkpoints/PCMQ4Mv2/${now:%Y-%m-%d_%H-%M-%S}/ filename: ${constants.name} #monitor: *monitor #mode: *mode From 44d241fc8865877b5d8d3d46d028b872a4e14f06 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Thu, 31 Aug 2023 18:45:08 -0400 Subject: [PATCH 28/30] Removing graphium/cli/test.py in favor of graphium/cli/train_finetune_test.py --- graphium/cli/test.py | 84 -------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 graphium/cli/test.py diff --git a/graphium/cli/test.py b/graphium/cli/test.py deleted file mode 100644 index 71c55a7b6..000000000 --- a/graphium/cli/test.py +++ /dev/null @@ -1,84 +0,0 @@ -import hydra -import wandb -import timeit - -from omegaconf import DictConfig, OmegaConf -from loguru import logger -from datetime import datetime -from lightning.pytorch.utilities.model_summary import ModelSummary -from graphium.trainer.predictor import PredictorModule - -from graphium.config._loader import ( - load_datamodule, - get_checkpoint_path, - load_trainer, - load_accelerator, -) -from graphium.utils.safe_run import SafeRun - - -@hydra.main(version_base=None, config_path="../../expts/hydra-configs", config_name="main") -def cli(cfg: DictConfig) -> None: - """ - CLI endpoint for running test step on model checkpoints. - """ - run_testing(cfg) - - -def run_testing(cfg: DictConfig) -> None: - """ - The main (pre-)training and fine-tuning loop. - """ - - cfg = OmegaConf.to_container(cfg, resolve=True) - - st = timeit.default_timer() - - wandb_cfg = cfg["constants"].get("wandb") - if wandb_cfg is not None: - wandb.init( - entity=wandb_cfg["entity"], - project=wandb_cfg["project"], - config=cfg, - ) - - ## == Instantiate all required objects from their respective configs == - # Accelerator - cfg, accelerator_type = load_accelerator(cfg) - - ## Data-module - datamodule = load_datamodule(cfg, accelerator_type) - - ## Load Predictor - predictor = PredictorModule.load_from_checkpoint( - checkpoint_path=get_checkpoint_path(cfg), map_location=cfg["accelerator"]["type"] - ) - - ## Load Trainer - date_time_suffix = datetime.now().strftime("%d.%m.%Y_%H.%M.%S") - trainer = load_trainer(cfg, accelerator_type, date_time_suffix) - - # Determine the max num nodes and edges in testing - datamodule.setup(stage="test") - - max_nodes = datamodule.get_max_num_nodes_datamodule(stages=["test"]) - max_edges = datamodule.get_max_num_edges_datamodule(stages=["test"]) - - predictor.model.set_max_num_nodes_edges_per_graph(max_nodes, max_edges) - - # Run the model testing - with SafeRun(name="TESTING", raise_error=cfg["constants"]["raise_train_error"], verbose=True): - trainer.test(model=predictor, datamodule=datamodule) - - logger.info("-" * 50) - logger.info("Total compute time:", timeit.default_timer() - st) - logger.info("-" * 50) - - if wandb_cfg is not None: - wandb.finish() - - return trainer.callback_metrics - - -if __name__ == "__main__": - cli() From 8f1ddfb658a856e0f86ef8d76b90fb7502109534 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Thu, 31 Aug 2023 18:48:16 -0400 Subject: [PATCH 29/30] Minor fix --- expts/hydra-configs/training/toymix.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expts/hydra-configs/training/toymix.yaml b/expts/hydra-configs/training/toymix.yaml index 5e4ece0b1..4afcbd56a 100644 --- a/expts/hydra-configs/training/toymix.yaml +++ b/expts/hydra-configs/training/toymix.yaml @@ -21,6 +21,6 @@ trainer: save_last: True trainer: precision: 16 - max_epochs: ${constants.max_epochs}-{epoch} + max_epochs: ${constants.max_epochs} min_epochs: 1 check_val_every_n_epoch: 20 \ No newline at end of file From 3cf2fb5a20f26de699f9bc3d871801f26a81dd13 Mon Sep 17 00:00:00 2001 From: WenkelF Date: Thu, 31 Aug 2023 18:57:38 -0400 Subject: [PATCH 30/30] Updating get_checkpoint_path --- graphium/config/_loader.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/graphium/config/_loader.py b/graphium/config/_loader.py index 4654dfd29..3235c9b68 100644 --- a/graphium/config/_loader.py +++ b/graphium/config/_loader.py @@ -589,15 +589,15 @@ def get_checkpoint_path(config: Union[omegaconf.DictConfig, Dict[str, Any]]) -> cfg_trainer = config["trainer"] - if "model_checkpoint" in cfg_trainer.keys(): - dirpath = cfg_trainer["model_checkpoint"]["dirpath"] + str(cfg_trainer["seed"]) + "/" - filename = config.get("ckpt_name_for_testing", "last") + ".ckpt" - else: - raise ValueError("Empty checkpoint section in config file") + path = config.get("ckpt_name_for_testing", "last.ckpt") + if path in GRAPHIUM_PRETRAINED_MODELS_DICT or fs.exists(path): + return path - checkpoint_path = fs.join(dirpath, filename) + if "model_checkpoint" in cfg_trainer.keys(): + dirpath = cfg_trainer["model_checkpoint"]["dirpath"] + path = fs.join(dirpath, path) - if not fs.exists(checkpoint_path): - raise ValueError(f"Checkpoint path `{checkpoint_path}` does not exist") + if not fs.exists(path): + raise ValueError(f"Checkpoint path `{path}` does not exist") - return checkpoint_path + return path