From 5ccc9f83e07529e5ce43295b026d3f7c0991506e Mon Sep 17 00:00:00 2001
From: andreeaiana <andreeaiana@gmail.com>
Date: Tue, 19 Mar 2024 11:50:59 +0100
Subject: [PATCH 1/5] Streamline user IDs and indices across datasets

---
 newsreclib/data/components/adressa_dataframe.py | 10 ++++++----
 newsreclib/data/components/batch.py             |  9 ++++++---
 newsreclib/data/components/mind_dataframe.py    |  2 +-
 tests/test_datamodules.py                       |  4 ++--
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/newsreclib/data/components/adressa_dataframe.py b/newsreclib/data/components/adressa_dataframe.py
index a943a15..336524b 100644
--- a/newsreclib/data/components/adressa_dataframe.py
+++ b/newsreclib/data/components/adressa_dataframe.py
@@ -548,7 +548,7 @@ def _load_behaviors(self) -> pd.DataFrame:
             log.info("Mapping uid to index.")
             behaviors["user"] = behaviors["uid"].apply(lambda x: uid2index.get(x, 0))
 
-            behaviors = behaviors[["user", "history", "candidates", "labels"]]
+            behaviors = behaviors[["uid", "user", "history", "candidates", "labels"]]
 
             # cache processed data
             log.info(
@@ -560,7 +560,7 @@ def _load_behaviors(self) -> pd.DataFrame:
 
     def _process_news_files(
         self, filepath
-    ) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, int]]:
+    ) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, str]]:
         """Processes the news data.
 
         Adapted from
@@ -604,7 +604,9 @@ def _process_news_files(
                                 == event_dict["category1"].split("|")[-1]
                             )
 
-        nid2index = {k: v for k, v in zip(news_title.keys(), range(1, len(news_title) + 1))}
+        nid2index = {
+            k: "N" + str(v) for k, v in zip(news_title.keys(), range(1, len(news_title) + 1))
+        }
 
         return news_title, news_category, news_subcategory, nid2index
 
@@ -664,7 +666,7 @@ def _process_users(
                         and event_dict["id"] in nid2index
                     ):
                         nindex = nid2index[event_dict["id"]]
-                        uid = event_dict["userId"]
+                        uid = "U" + str(event_dict["userId"])
 
                         if uid not in uid2index:
                             uid2index[uid] = len(uid2index)
diff --git a/newsreclib/data/components/batch.py b/newsreclib/data/components/batch.py
index 7fcae72..e38846a 100644
--- a/newsreclib/data/components/batch.py
+++ b/newsreclib/data/components/batch.py
@@ -17,8 +17,10 @@ class RecommendationBatch(TypedDict):
             Dictionary of news from a the users' candidates, mapping news features to values.
         labels:
             Ground truth specifying whether the news is relevant to the user.
-        users:
-            Users included in the batch.
+        user_ids:
+            Original user IDs of the users included in the batch.
+        user_idx:
+            Indices of users included in the batch (e.g., for creating embedding matrix).
     """
 
     batch_hist: torch.Tensor
@@ -26,7 +28,8 @@ class RecommendationBatch(TypedDict):
     x_hist: Dict[str, Any]
     x_cand: Dict[str, Any]
     labels: torch.Tensor
-    users: torch.Tensor
+    user_ids: torch.Tensor
+    user_idx: torch.Tensor
 
 
 class NewsBatch(TypedDict):
diff --git a/newsreclib/data/components/mind_dataframe.py b/newsreclib/data/components/mind_dataframe.py
index 7c17ec1..6ba05fb 100644
--- a/newsreclib/data/components/mind_dataframe.py
+++ b/newsreclib/data/components/mind_dataframe.py
@@ -590,7 +590,7 @@ def _load_behaviors(self) -> pd.DataFrame:
 
             # cache parsed behaviors
             log.info(f"Caching parsed behaviors of size {len(behaviors)} to {parsed_bhv_file}.")
-            behaviors = behaviors[["user", "history", "candidates", "labels"]]
+            behaviors = behaviors[["uid", "user", "history", "candidates", "labels"]]
             file_utils.to_tsv(behaviors, parsed_bhv_file)
 
         return behaviors
diff --git a/tests/test_datamodules.py b/tests/test_datamodules.py
index 3711b2d..04e1e15 100644
--- a/tests/test_datamodules.py
+++ b/tests/test_datamodules.py
@@ -116,7 +116,7 @@ def test_mind_rec_small_datamodule(batch_size):
 
     batch = next(iter(dm.train_dataloader()))
 
-    assert len(batch["users"]) == batch_size
+    assert len(batch["user_idx"]) == batch_size
 
 
 @pytest.mark.parametrize("batch_size", [8, 64])
@@ -223,4 +223,4 @@ def test_adressa_rec_small_datamodule(batch_size):
 
     batch = next(iter(dm.train_dataloader()))
 
-    assert len(batch["users"]) == batch_size
+    assert len(batch["user_idx"]) == batch_size

From bbf494bbd06bc98acda9124ccc58526bc760b75c Mon Sep 17 00:00:00 2001
From: andreeaiana <andreeaiana@gmail.com>
Date: Tue, 19 Mar 2024 11:53:25 +0100
Subject: [PATCH 2/5] Feat: add functionality to save recommendation lists and
 scores

---
 newsreclib/models/abstract_recommender.py | 37 +++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/newsreclib/models/abstract_recommender.py b/newsreclib/models/abstract_recommender.py
index d62a827..f5c876f 100644
--- a/newsreclib/models/abstract_recommender.py
+++ b/newsreclib/models/abstract_recommender.py
@@ -1,3 +1,4 @@
+import json
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
@@ -154,3 +155,39 @@ def _clear_epoch_outputs(
             outputs_dict[key].clear()
 
         return outputs_dict
+
+    def _get_recommendations(
+        self,
+        user_ids: torch.Tensor,
+        news_ids: torch.Tensor,
+        scores: torch.Tensor,
+        cand_news_size: torch.Tensor,
+    ) -> Dict[int, Dict[str, List[Any]]]:
+        """Returns the recommendations and corresponding scores for the given users.
+
+        Attributes:
+            user_ids (torch.Tensor): IDs of users.
+            news_ids (torch.Tensor): IDs of the candidates news.
+            scores (torch.Tensor): Predicted scores for the candidate news.
+            cand_news_size (torch.Tensor): Number of candidate news for each user.
+
+        Returns:
+            Dict[int, Dict[str, List[Any]]]: A dictionary with user IDs as keys and an inner dictionary of recommendations and corresponding scores as values.
+        """
+        users = torch.repeat_interleave(user_ids.detach().cpu(), cand_news_size).tolist()
+        users = ["U" + str(uid) for uid in users]
+        news = ["N" + str(nid) for nid in news_ids.detach().cpu().tolist()]
+        scores = scores.detach().cpu().tolist()
+
+        # dictionary of recommendations and scores for each user
+        recommendations_dico = {}
+        for user, news, score in zip(users, news, scores):
+            if user not in recommendations_dico:
+                recommendations_dico[user] = {}
+            recommendations_dico[user][news] = score
+
+        return recommendations_dico
+
+    def _save_recommendations(self, recommendations: Dict[int, Dict[str, List[Any]]], fpath: str):
+        with open(fpath, "w") as f:
+            json.dump(recommendations, f)

From e0a1baab85930f3aa067544de51d54d7c05d0ae2 Mon Sep 17 00:00:00 2001
From: andreeaiana <andreeaiana@gmail.com>
Date: Tue, 19 Mar 2024 11:55:29 +0100
Subject: [PATCH 3/5] Streamline user IDs and indices; add news_ids to batch

---
 newsreclib/data/components/news_dataset.py |  4 +++
 newsreclib/data/components/rec_dataset.py  | 30 +++++++++++++++-------
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/newsreclib/data/components/news_dataset.py b/newsreclib/data/components/news_dataset.py
index 35d36ef..7ee6f11 100644
--- a/newsreclib/data/components/news_dataset.py
+++ b/newsreclib/data/components/news_dataset.py
@@ -101,6 +101,10 @@ def _tokenize_plm(self, text: List[str]):
     def _tokenize_df(self, df: pd.DataFrame) -> Dict[str, Any]:
         batch_out = {}
 
+        # news IDs (i.e., keep only numeric part of unique NID)
+        nids = np.array([int(nid.split("N")[-1]) for nid in df.index.values])
+        batch_out["news_ids"] = torch.from_numpy(nids).long()
+
         if not self.concatenate_inputs:
             # prepare text
             if not self.use_plm:
diff --git a/newsreclib/data/components/rec_dataset.py b/newsreclib/data/components/rec_dataset.py
index 93ace66..c45dec9 100644
--- a/newsreclib/data/components/rec_dataset.py
+++ b/newsreclib/data/components/rec_dataset.py
@@ -36,10 +36,13 @@ def __init__(
         self.max_history_len = max_history_len
         self.neg_sampling_ratio = neg_sampling_ratio
 
-    def __getitem__(self, index: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]:
+    def __getitem__(
+        self, index: Any
+    ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]:
         bhv = self.behaviors.iloc[index]
 
-        user = np.array([int(bhv["user"])])
+        user_id = np.array([int(bhv["uid"].split("U")[-1])])
+        user_idx = np.array([int(bhv["user"])])
         history = np.array(bhv["history"])[: self.max_history_len]
         candidates = np.array(bhv["candidates"])
         labels = np.array(bhv["labels"])
@@ -49,7 +52,7 @@ def __getitem__(self, index: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFram
         history = self.news.loc[history]
         candidates = self.news.loc[candidates]
 
-        return user, history, candidates, labels
+        return user_id, user_idx, history, candidates, labels
 
     def __len__(self) -> int:
         return len(self.behaviors)
@@ -98,10 +101,13 @@ def __init__(self, news: pd.DataFrame, behaviors: pd.DataFrame, max_history_len:
         self.behaviors = behaviors
         self.max_history_len = max_history_len
 
-    def __getitem__(self, idx: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]:
+    def __getitem__(
+        self, idx: Any
+    ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]:
         bhv = self.behaviors.iloc[idx]
 
-        user = np.array([int(bhv["user"])])
+        user_id = np.array([int(bhv["uid"].split("U")[-1])])
+        user_idx = np.array([int(bhv["user"])])
         history = np.array(bhv["history"])[: self.max_history_len]
         candidates = np.array(bhv["candidates"])
         labels = np.array(bhv["labels"])
@@ -109,7 +115,7 @@ def __getitem__(self, idx: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFrame,
         history = self.news.loc[history]
         candidates = self.news.loc[candidates]
 
-        return user, history, candidates, labels
+        return user_id, user_idx, history, candidates, labels
 
     def __len__(self) -> int:
         return len(self.behaviors)
@@ -140,7 +146,7 @@ def __init__(
         self.concatenate_inputs = concatenate_inputs
 
     def __call__(self, batch) -> RecommendationBatch:
-        users, histories, candidates, labels = zip(*batch)
+        user_ids, user_idx, histories, candidates, labels = zip(*batch)
 
         batch_hist = self._make_batch_asignees(histories)
         batch_cand = self._make_batch_asignees(candidates)
@@ -148,7 +154,8 @@ def __call__(self, batch) -> RecommendationBatch:
         x_hist = self._tokenize_df(pd.concat(histories))
         x_cand = self._tokenize_df(pd.concat(candidates))
         labels = torch.from_numpy(np.concatenate(labels)).float()
-        users = torch.from_numpy(np.concatenate(users)).long()
+        user_ids = torch.from_numpy(np.concatenate(user_ids)).long()
+        user_idx = torch.from_numpy(np.concatenate(user_idx)).long()
 
         return RecommendationBatch(
             batch_hist=batch_hist,
@@ -156,7 +163,8 @@ def __call__(self, batch) -> RecommendationBatch:
             x_hist=x_hist,
             x_cand=x_cand,
             labels=labels,
-            users=users,
+            user_ids=user_ids,
+            user_idx=user_idx,
         )
 
     def _tokenize_embeddings(self, text: List[List[int]], max_len: Optional[int]) -> torch.Tensor:
@@ -177,6 +185,10 @@ def _tokenize_plm(self, text: List[str]):
     def _tokenize_df(self, df: pd.DataFrame) -> Dict[str, Any]:
         batch_out = {}
 
+        # news IDs (i.e., keep only numeric part of unique NID)
+        nids = np.array([int(nid.split("N")[-1]) for nid in df.index.values])
+        batch_out["news_ids"] = torch.from_numpy(nids).long()
+
         if not self.concatenate_inputs:
             # prepare text
             if not self.use_plm:

From eca9f54f6c7c9dee72868ba7c84b0bdc64464ddc Mon Sep 17 00:00:00 2001
From: andreeaiana <andreeaiana@gmail.com>
Date: Tue, 19 Mar 2024 11:56:32 +0100
Subject: [PATCH 4/5] Add optional saving of recommendation list in
 on_test_epoch_end

---
 .../models/fair_rec/manner_cr_module.py       | 38 +++++++++++++++++-
 newsreclib/models/fair_rec/manner_module.py   | 34 ++++++++++++++++
 .../models/fair_rec/senti_debias_module.py    | 36 ++++++++++++++++-
 newsreclib/models/fair_rec/sentirec_module.py | 38 +++++++++++++++++-
 newsreclib/models/general_rec/caum_module.py  | 38 +++++++++++++++++-
 .../models/general_rec/cen_news_rec_module.py | 38 +++++++++++++++++-
 newsreclib/models/general_rec/dkn_module.py   | 38 +++++++++++++++++-
 newsreclib/models/general_rec/lstur_module.py | 40 +++++++++++++++++--
 newsreclib/models/general_rec/miner_module.py | 38 +++++++++++++++++-
 newsreclib/models/general_rec/mins_module.py  | 38 +++++++++++++++++-
 newsreclib/models/general_rec/naml_module.py  | 38 +++++++++++++++++-
 newsreclib/models/general_rec/npa_module.py   | 40 +++++++++++++++++--
 newsreclib/models/general_rec/nrms_module.py  | 38 +++++++++++++++++-
 newsreclib/models/general_rec/tanr_module.py  | 38 +++++++++++++++++-
 14 files changed, 503 insertions(+), 27 deletions(-)

diff --git a/newsreclib/models/fair_rec/manner_cr_module.py b/newsreclib/models/fair_rec/manner_cr_module.py
index 45a5250..071ab67 100644
--- a/newsreclib/models/fair_rec/manner_cr_module.py
+++ b/newsreclib/models/fair_rec/manner_cr_module.py
@@ -60,6 +60,10 @@ class CRModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -86,6 +90,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -98,6 +104,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         self.criterion = self._get_loss(self.hparams.loss)
 
@@ -259,6 +268,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -319,6 +330,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -329,10 +343,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -366,7 +382,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -420,6 +436,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -455,6 +473,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -483,5 +504,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/fair_rec/manner_module.py b/newsreclib/models/fair_rec/manner_module.py
index 5e4d3b9..723a3d8 100644
--- a/newsreclib/models/fair_rec/manner_module.py
+++ b/newsreclib/models/fair_rec/manner_module.py
@@ -41,6 +41,10 @@ class MANNERModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -58,6 +62,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -70,6 +76,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # load ensemble components
         self.cr_module = CRModule.load_from_checkpoint(
             checkpoint_path=self.hparams.cr_module_module_ckpt
@@ -208,6 +217,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -237,6 +248,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             preds,
             targets,
@@ -246,6 +260,8 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
@@ -264,6 +280,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # collect step outputs for metric computation
@@ -287,6 +305,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -315,5 +336,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/fair_rec/senti_debias_module.py b/newsreclib/models/fair_rec/senti_debias_module.py
index 2402553..288241d 100644
--- a/newsreclib/models/fair_rec/senti_debias_module.py
+++ b/newsreclib/models/fair_rec/senti_debias_module.py
@@ -283,6 +283,10 @@ class SentiDebiasModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         alpha_coefficient:
@@ -305,6 +309,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         alpha_coefficient: float,
         beta_coefficient: float,
@@ -323,6 +329,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         self.rec_loss = self._get_loss("cross_entropy_loss")
         self.a_loss = self._get_loss("cross_entropy_loss")
@@ -416,6 +425,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         _, bias_free_scores, _, _, _ = self.forward(batch)
 
@@ -445,6 +456,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             preds,
             targets,
@@ -454,6 +468,8 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
@@ -532,7 +548,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # collect step outputs for metric computation
         self.val_step_outputs = self._collect_step_outputs(
@@ -581,6 +597,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # collect step outputs for metric computation
@@ -604,6 +622,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -632,6 +653,19 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
 
diff --git a/newsreclib/models/fair_rec/sentirec_module.py b/newsreclib/models/fair_rec/sentirec_module.py
index 17dbdaa..2cb347f 100644
--- a/newsreclib/models/fair_rec/sentirec_module.py
+++ b/newsreclib/models/fair_rec/sentirec_module.py
@@ -68,6 +68,10 @@ class SentiRecModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -97,6 +101,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -109,6 +115,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -278,6 +287,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores, sent_scores = self.forward(batch)
 
@@ -366,6 +377,9 @@ def model_step(
             [torch.where(mask_cand[n])[0].shape[0] for n in range(mask_cand.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -376,10 +390,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -413,7 +429,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -467,6 +483,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -502,6 +520,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -530,6 +551,19 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self.test_step_outputs = self._clear_epoch_outputs(
             self.test_step_outputs
diff --git a/newsreclib/models/general_rec/caum_module.py b/newsreclib/models/general_rec/caum_module.py
index e4019c9..f1aff46 100644
--- a/newsreclib/models/general_rec/caum_module.py
+++ b/newsreclib/models/general_rec/caum_module.py
@@ -83,6 +83,10 @@ class CAUMModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -120,6 +124,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -132,6 +138,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -365,6 +374,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -438,6 +449,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -448,10 +462,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -485,7 +501,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -539,6 +555,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -574,6 +592,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -602,5 +623,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/cen_news_rec_module.py b/newsreclib/models/general_rec/cen_news_rec_module.py
index 9525981..42359a7 100644
--- a/newsreclib/models/general_rec/cen_news_rec_module.py
+++ b/newsreclib/models/general_rec/cen_news_rec_module.py
@@ -70,6 +70,10 @@ class CenNewsRecModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -101,6 +105,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -113,6 +119,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -278,6 +287,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -351,6 +362,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -361,10 +375,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -398,7 +414,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -452,6 +468,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -487,6 +505,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -515,5 +536,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/dkn_module.py b/newsreclib/models/general_rec/dkn_module.py
index 9925d70..a290376 100644
--- a/newsreclib/models/general_rec/dkn_module.py
+++ b/newsreclib/models/general_rec/dkn_module.py
@@ -59,6 +59,10 @@ class DKNModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -84,6 +88,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -96,6 +102,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -244,6 +253,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -317,6 +328,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -327,10 +341,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -364,7 +380,7 @@ def on_train_epoch_end(self) -> None:
         self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -418,6 +434,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -453,6 +471,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -481,5 +502,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/lstur_module.py b/newsreclib/models/general_rec/lstur_module.py
index 47b4336..7c902cf 100644
--- a/newsreclib/models/general_rec/lstur_module.py
+++ b/newsreclib/models/general_rec/lstur_module.py
@@ -77,6 +77,10 @@ class LSTURModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -110,6 +114,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -123,6 +129,9 @@ def __init__(
         self.num_sent_classes = self.hparams.num_sent_classes + 1
         self.num_users = self.hparams.num_users + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -281,7 +290,7 @@ def forward(self, batch: RecommendationBatch) -> torch.Tensor:
         )
         if not self.hparams.late_fusion:
             # encode user
-            user_vector = self.user_encoder(batch["users"], hist_news_vector_agg, hist_size)
+            user_vector = self.user_encoder(batch["user_idx"], hist_news_vector_agg, hist_size)
         else:
             # aggregate embeddings of clicked news
             user_vector = torch.div(hist_news_vector_agg.sum(dim=1), hist_size.unsqueeze(dim=-1))
@@ -308,6 +317,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -381,6 +392,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -391,10 +405,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -428,7 +444,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -482,6 +498,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -517,6 +535,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -545,5 +566,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/miner_module.py b/newsreclib/models/general_rec/miner_module.py
index 1499564..694c556 100644
--- a/newsreclib/models/general_rec/miner_module.py
+++ b/newsreclib/models/general_rec/miner_module.py
@@ -78,6 +78,10 @@ class MINERModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -109,6 +113,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -121,6 +127,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -328,6 +337,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores, user_vector = self.forward(batch)
 
@@ -411,6 +422,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -421,10 +435,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -458,7 +474,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -512,6 +528,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -547,6 +565,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -575,5 +596,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/mins_module.py b/newsreclib/models/general_rec/mins_module.py
index bd9633d..cd79b6c 100644
--- a/newsreclib/models/general_rec/mins_module.py
+++ b/newsreclib/models/general_rec/mins_module.py
@@ -69,6 +69,10 @@ class MINSModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -99,6 +103,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -111,6 +117,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -283,6 +292,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -356,6 +367,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -366,10 +380,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -403,7 +419,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -457,6 +473,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -492,6 +510,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -520,5 +541,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/naml_module.py b/newsreclib/models/general_rec/naml_module.py
index 2c90b31..c9f4c71 100644
--- a/newsreclib/models/general_rec/naml_module.py
+++ b/newsreclib/models/general_rec/naml_module.py
@@ -69,6 +69,10 @@ class NAMLModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -99,6 +103,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -111,6 +117,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -291,6 +300,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -364,6 +375,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -374,10 +388,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -411,7 +427,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -465,6 +481,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -500,6 +518,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -528,5 +549,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/npa_module.py b/newsreclib/models/general_rec/npa_module.py
index 6a7e517..fe4ec23 100644
--- a/newsreclib/models/general_rec/npa_module.py
+++ b/newsreclib/models/general_rec/npa_module.py
@@ -64,6 +64,10 @@ class NPAModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -90,6 +94,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -102,6 +108,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -212,7 +221,7 @@ def forward(self, batch: RecommendationBatch) -> torch.Tensor:
         )
 
         # project users
-        projected_users = self.user_projection(batch["users"])
+        projected_users = self.user_projection(batch["user_idx"])
 
         # encode user history
         hist_news_vector = self.news_encoder(batch["x_hist"]["title"], hist_size, projected_users)
@@ -255,6 +264,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -328,6 +339,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -338,10 +352,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -375,7 +391,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -429,6 +445,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -464,6 +482,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -492,5 +513,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/nrms_module.py b/newsreclib/models/general_rec/nrms_module.py
index e8dd2ac..5e03233 100644
--- a/newsreclib/models/general_rec/nrms_module.py
+++ b/newsreclib/models/general_rec/nrms_module.py
@@ -62,6 +62,10 @@ class NRMSModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -89,6 +93,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -101,6 +107,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -260,6 +269,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores = self.forward(batch)
 
@@ -333,6 +344,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -343,10 +357,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -380,7 +396,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -434,6 +450,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -469,6 +487,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -497,5 +518,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)
diff --git a/newsreclib/models/general_rec/tanr_module.py b/newsreclib/models/general_rec/tanr_module.py
index 7b0cf6b..46faf74 100644
--- a/newsreclib/models/general_rec/tanr_module.py
+++ b/newsreclib/models/general_rec/tanr_module.py
@@ -71,6 +71,10 @@ class TANRModule(AbstractRecommneder):
             The number of topical categories.
         num_sent_classes:
             The number of sentiment classes.
+        save_recs:
+            Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format.
+        recs_fpath:
+            Path where to save the list of recommendations and corresponding scores for users.
         optimizer:
             Optimizer used for model training.
         scheduler:
@@ -101,6 +105,8 @@ def __init__(
         top_k_list: List[int],
         num_categ_classes: int,
         num_sent_classes: int,
+        save_recs: bool,
+        recs_fpath: Optional[str],
         optimizer: torch.optim.Optimizer,
         scheduler: torch.optim.lr_scheduler,
     ) -> None:
@@ -113,6 +119,9 @@ def __init__(
         self.num_categ_classes = self.hparams.num_categ_classes + 1
         self.num_sent_classes = self.hparams.num_sent_classes + 1
 
+        if self.hparams.save_recs:
+            assert isinstance(self.hparams.recs_fpath, str)
+
         # initialize recommendation loss
         if not self.hparams.dual_loss_training:
             self.criterion = self._get_loss(self.hparams.loss)
@@ -291,6 +300,8 @@ def model_step(
         torch.Tensor,
         torch.Tensor,
         torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
     ]:
         scores, topic_scores = self.forward(batch)
 
@@ -372,6 +383,9 @@ def model_step(
             [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])]
         )
 
+        user_ids = batch["user_ids"]
+        cand_news_ids = batch["x_cand"]["news_ids"]
+
         return (
             loss,
             preds,
@@ -382,10 +396,12 @@ def model_step(
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         )
 
     def training_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log loss
         self.train_loss(loss)
@@ -419,7 +435,7 @@ def on_train_epoch_end(self) -> None:
         self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs)
 
     def validation_step(self, batch: RecommendationBatch, batch_idx: int):
-        loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch)
+        loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch)
 
         # update and log metrics
         self.val_loss(loss)
@@ -473,6 +489,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int):
             target_sentiments,
             hist_categories,
             hist_sentiments,
+            user_ids,
+            cand_news_ids,
         ) = self.model_step(batch)
 
         # update and log metrics
@@ -508,6 +526,9 @@ def on_test_epoch_end(self) -> None:
         cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size)
         hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size)
 
+        user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids")
+        cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids")
+
         # update metrics
         self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes})
         self.test_categ_div_metrics(preds, target_categories, cand_indexes)
@@ -536,5 +557,18 @@ def on_test_epoch_end(self) -> None:
             self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True
         )
 
+        # save recommendations
+        if self.hparams.save_recs:
+            recommendations_dico = self._get_recommendations(
+                user_ids=user_ids,
+                news_ids=cand_news_ids,
+                scores=preds,
+                cand_news_size=cand_news_size,
+            )
+            print(recommendations_dico)
+            self._save_recommendations(
+                recommendations=recommendations_dico, fpath=self.hparams.recs_fpath
+            )
+
         # clear memory for the next epoch
         self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs)

From 1b33d384ea5b3e77d1c8c78e206e9a6129bf768e Mon Sep 17 00:00:00 2001
From: andreeaiana <andreeaiana@gmail.com>
Date: Tue, 19 Mar 2024 11:57:25 +0100
Subject: [PATCH 5/5] Update model configs with hyperparameters for saving
 recommendations lists

---
 configs/model/caum.yaml             | 4 ++++
 configs/model/cen_news_rec.yaml     | 4 ++++
 configs/model/dkn.yaml              | 4 ++++
 configs/model/lstur.yaml            | 4 ++++
 configs/model/manner_cr_module.yaml | 4 ++++
 configs/model/manner_module.yaml    | 4 ++++
 configs/model/miner.yaml            | 4 ++++
 configs/model/mins.yaml             | 4 ++++
 configs/model/naml.yaml             | 4 ++++
 configs/model/npa.yaml              | 4 ++++
 configs/model/nrms.yaml             | 4 ++++
 configs/model/senti_debias.yaml     | 4 ++++
 configs/model/sentirec.yaml         | 4 ++++
 configs/model/tanr.yaml             | 4 ++++
 14 files changed, 56 insertions(+)

diff --git a/configs/model/caum.yaml b/configs/model/caum.yaml
index f936cda..9e4d361 100644
--- a/configs/model/caum.yaml
+++ b/configs/model/caum.yaml
@@ -47,12 +47,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/cen_news_rec.yaml b/configs/model/cen_news_rec.yaml
index aba8165..cf62ca6 100644
--- a/configs/model/cen_news_rec.yaml
+++ b/configs/model/cen_news_rec.yaml
@@ -41,12 +41,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/dkn.yaml b/configs/model/dkn.yaml
index e138b4f..11b2289 100644
--- a/configs/model/dkn.yaml
+++ b/configs/model/dkn.yaml
@@ -33,12 +33,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/lstur.yaml b/configs/model/lstur.yaml
index 47f7af4..c7e28f3 100644
--- a/configs/model/lstur.yaml
+++ b/configs/model/lstur.yaml
@@ -43,12 +43,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/manner_cr_module.yaml b/configs/model/manner_cr_module.yaml
index 83c4f8a..8a28358 100644
--- a/configs/model/manner_cr_module.yaml
+++ b/configs/model/manner_cr_module.yaml
@@ -34,12 +34,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/manner_module.yaml b/configs/model/manner_module.yaml
index 1b3adb8..be2c06a 100644
--- a/configs/model/manner_module.yaml
+++ b/configs/model/manner_module.yaml
@@ -18,12 +18,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer: null
 scheduler: null
diff --git a/configs/model/miner.yaml b/configs/model/miner.yaml
index aeefaf4..8aa28de 100644
--- a/configs/model/miner.yaml
+++ b/configs/model/miner.yaml
@@ -39,12 +39,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/mins.yaml b/configs/model/mins.yaml
index 07aafaf..6514885 100644
--- a/configs/model/mins.yaml
+++ b/configs/model/mins.yaml
@@ -40,12 +40,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/naml.yaml b/configs/model/naml.yaml
index abf9aea..94a2483 100644
--- a/configs/model/naml.yaml
+++ b/configs/model/naml.yaml
@@ -38,12 +38,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/npa.yaml b/configs/model/npa.yaml
index 4b2b2c6..40282a3 100644
--- a/configs/model/npa.yaml
+++ b/configs/model/npa.yaml
@@ -32,12 +32,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/nrms.yaml b/configs/model/nrms.yaml
index b60ed12..13546b9 100644
--- a/configs/model/nrms.yaml
+++ b/configs/model/nrms.yaml
@@ -35,12 +35,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/senti_debias.yaml b/configs/model/senti_debias.yaml
index 8ff4251..ad7ca59 100644
--- a/configs/model/senti_debias.yaml
+++ b/configs/model/senti_debias.yaml
@@ -45,6 +45,8 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # loss coefficients
@@ -55,6 +57,8 @@ beta_coefficient: 10
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer: null
 
diff --git a/configs/model/sentirec.yaml b/configs/model/sentirec.yaml
index def2ea3..8b9ade1 100644
--- a/configs/model/sentirec.yaml
+++ b/configs/model/sentirec.yaml
@@ -41,12 +41,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam
diff --git a/configs/model/tanr.yaml b/configs/model/tanr.yaml
index 18bfe2a..41765b3 100644
--- a/configs/model/tanr.yaml
+++ b/configs/model/tanr.yaml
@@ -40,12 +40,16 @@ outputs:
       "target_sentiments",
       "hist_categories",
       "hist_sentiments",
+      "user_ids",
+      "cand_news_ids",
     ]
 
 # evaluation
 top_k_list: [5, 10]
 num_categ_classes: 18
 num_sent_classes: 3
+save_recs: False
+recs_fpath: "${paths.output_dir}/recommendations.json"
 
 optimizer:
   _target_: torch.optim.Adam