From 5b9c406e7c8c5a7e1bc32b3f3bf13ff4431d76a9 Mon Sep 17 00:00:00 2001 From: Igor Date: Fri, 15 Mar 2024 04:18:59 +0000 Subject: [PATCH 1/2] fix: bug while executing eval.yaml --- configs/eval.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/configs/eval.yaml b/configs/eval.yaml index a26e25b..32e46d3 100644 --- a/configs/eval.yaml +++ b/configs/eval.yaml @@ -4,15 +4,25 @@ defaults: - _self_ - data: null # choose datamodule with `test_dataloader()` for evaluation - model: null + - callbacks: default.yaml - logger: many_loggers.yaml - trainer: default.yaml - paths: default.yaml - extras: default.yaml - hydra: default.yaml + # experiment configs allow for version control of specific hyperparameters + # e.g. best hyperparameters for given model and datamodule + - experiment: null + +# task name, determines output directory path task_name: "eval" +# tags to help you identify your experiments +# you can overwrite this in experiment configs +# overwrite from command line with `python train.py tags="[first_tag, second_tag]"` tags: ["eval"] # passing checkpoint path is necessary for evaluation -ckpt_path: ??? +# example: logs/train/runs/nrms_mindsmall_pretrainedemb_celoss_bertsent_s42/2024-03-08_07-48-40/checkpoints/last.ckpt +ckpt_path: ??? \ No newline at end of file From a6ef42cb6e8577c71a865d805a9ec687e2fb7587 Mon Sep 17 00:00:00 2001 From: Igor Date: Fri, 15 Mar 2024 04:19:42 +0000 Subject: [PATCH 2/2] docs: adding example for MINDLarge --- README.md | 10 +++++ ...ndlarge_pretrainedemb_celoss_bertsent.yaml | 42 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 configs/experiment/nrms_mindlarge_pretrainedemb_celoss_bertsent.yaml diff --git a/README.md b/README.md index 393583b..cebc6aa 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,16 @@ of the corresponding modules. num_heads: 15 ``` +For training the `NRMS` model on the `MINDlarge` dataset, execute the following command: + +```python +python newsreclib/train.py experiment=nrms_mindlarge_pretrainedemb_celoss_bertsent +``` + +To understand how to adjust configuration files when transitioning from smaller to larger datasets, refer to the examples provided in `nrms_mindsmall_pretrainedemb_celoss_bertsent` and `nrms_mindlarge_pretrainedemb_celoss_bertsent`. These files will guide you in scaling your configurations appropriately. + +*Note:* The same procedure applies for the advanced configuration shown below. + ## Advanced Configuration The advanced scenario depicts a more complex experimental setting. diff --git a/configs/experiment/nrms_mindlarge_pretrainedemb_celoss_bertsent.yaml b/configs/experiment/nrms_mindlarge_pretrainedemb_celoss_bertsent.yaml new file mode 100644 index 0000000..ad96a91 --- /dev/null +++ b/configs/experiment/nrms_mindlarge_pretrainedemb_celoss_bertsent.yaml @@ -0,0 +1,42 @@ +# @package _global_ + +# to execute this experiment run: +# python train.py experiment=example + +defaults: + - override /data: mind_rec_bert_sent.yaml + - override /model: nrms.yaml + - override /callbacks: default.yaml + - override /logger: many_loggers.yaml + - override /trainer: gpu.yaml + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +tags: ["nrms", "mindlarge", "pretrainedemb", "celoss", "bertsent"] + +seed: 42 + +data: + dataset_size: "large" + +model: + use_plm: False + pretrained_embeddings_path: ${paths.data_dir}MINDlarge_train/transformed_word_embeddings.npy + embed_dim: 300 + num_heads: 15 + query_dim: 200 + dropout_probability: 0.2 + +callbacks: + early_stopping: + patience: 5 + +trainer: + max_epochs: 20 + +logger: + wandb: + name: "nrms_mindlarge_pretrainedemb_celoss_bertsent_s42" + tags: ${tags} + group: "mind"