From 5ccc9f83e07529e5ce43295b026d3f7c0991506e Mon Sep 17 00:00:00 2001 From: andreeaiana Date: Tue, 19 Mar 2024 11:50:59 +0100 Subject: [PATCH 1/5] Streamline user IDs and indices across datasets --- newsreclib/data/components/adressa_dataframe.py | 10 ++++++---- newsreclib/data/components/batch.py | 9 ++++++--- newsreclib/data/components/mind_dataframe.py | 2 +- tests/test_datamodules.py | 4 ++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/newsreclib/data/components/adressa_dataframe.py b/newsreclib/data/components/adressa_dataframe.py index a943a15..336524b 100644 --- a/newsreclib/data/components/adressa_dataframe.py +++ b/newsreclib/data/components/adressa_dataframe.py @@ -548,7 +548,7 @@ def _load_behaviors(self) -> pd.DataFrame: log.info("Mapping uid to index.") behaviors["user"] = behaviors["uid"].apply(lambda x: uid2index.get(x, 0)) - behaviors = behaviors[["user", "history", "candidates", "labels"]] + behaviors = behaviors[["uid", "user", "history", "candidates", "labels"]] # cache processed data log.info( @@ -560,7 +560,7 @@ def _load_behaviors(self) -> pd.DataFrame: def _process_news_files( self, filepath - ) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, int]]: + ) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, str]]: """Processes the news data. Adapted from @@ -604,7 +604,9 @@ def _process_news_files( == event_dict["category1"].split("|")[-1] ) - nid2index = {k: v for k, v in zip(news_title.keys(), range(1, len(news_title) + 1))} + nid2index = { + k: "N" + str(v) for k, v in zip(news_title.keys(), range(1, len(news_title) + 1)) + } return news_title, news_category, news_subcategory, nid2index @@ -664,7 +666,7 @@ def _process_users( and event_dict["id"] in nid2index ): nindex = nid2index[event_dict["id"]] - uid = event_dict["userId"] + uid = "U" + str(event_dict["userId"]) if uid not in uid2index: uid2index[uid] = len(uid2index) diff --git a/newsreclib/data/components/batch.py b/newsreclib/data/components/batch.py index 7fcae72..e38846a 100644 --- a/newsreclib/data/components/batch.py +++ b/newsreclib/data/components/batch.py @@ -17,8 +17,10 @@ class RecommendationBatch(TypedDict): Dictionary of news from a the users' candidates, mapping news features to values. labels: Ground truth specifying whether the news is relevant to the user. - users: - Users included in the batch. + user_ids: + Original user IDs of the users included in the batch. + user_idx: + Indices of users included in the batch (e.g., for creating embedding matrix). """ batch_hist: torch.Tensor @@ -26,7 +28,8 @@ class RecommendationBatch(TypedDict): x_hist: Dict[str, Any] x_cand: Dict[str, Any] labels: torch.Tensor - users: torch.Tensor + user_ids: torch.Tensor + user_idx: torch.Tensor class NewsBatch(TypedDict): diff --git a/newsreclib/data/components/mind_dataframe.py b/newsreclib/data/components/mind_dataframe.py index 7c17ec1..6ba05fb 100644 --- a/newsreclib/data/components/mind_dataframe.py +++ b/newsreclib/data/components/mind_dataframe.py @@ -590,7 +590,7 @@ def _load_behaviors(self) -> pd.DataFrame: # cache parsed behaviors log.info(f"Caching parsed behaviors of size {len(behaviors)} to {parsed_bhv_file}.") - behaviors = behaviors[["user", "history", "candidates", "labels"]] + behaviors = behaviors[["uid", "user", "history", "candidates", "labels"]] file_utils.to_tsv(behaviors, parsed_bhv_file) return behaviors diff --git a/tests/test_datamodules.py b/tests/test_datamodules.py index 3711b2d..04e1e15 100644 --- a/tests/test_datamodules.py +++ b/tests/test_datamodules.py @@ -116,7 +116,7 @@ def test_mind_rec_small_datamodule(batch_size): batch = next(iter(dm.train_dataloader())) - assert len(batch["users"]) == batch_size + assert len(batch["user_idx"]) == batch_size @pytest.mark.parametrize("batch_size", [8, 64]) @@ -223,4 +223,4 @@ def test_adressa_rec_small_datamodule(batch_size): batch = next(iter(dm.train_dataloader())) - assert len(batch["users"]) == batch_size + assert len(batch["user_idx"]) == batch_size From bbf494bbd06bc98acda9124ccc58526bc760b75c Mon Sep 17 00:00:00 2001 From: andreeaiana Date: Tue, 19 Mar 2024 11:53:25 +0100 Subject: [PATCH 2/5] Feat: add functionality to save recommendation lists and scores --- newsreclib/models/abstract_recommender.py | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/newsreclib/models/abstract_recommender.py b/newsreclib/models/abstract_recommender.py index d62a827..f5c876f 100644 --- a/newsreclib/models/abstract_recommender.py +++ b/newsreclib/models/abstract_recommender.py @@ -1,3 +1,4 @@ +import json from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np @@ -154,3 +155,39 @@ def _clear_epoch_outputs( outputs_dict[key].clear() return outputs_dict + + def _get_recommendations( + self, + user_ids: torch.Tensor, + news_ids: torch.Tensor, + scores: torch.Tensor, + cand_news_size: torch.Tensor, + ) -> Dict[int, Dict[str, List[Any]]]: + """Returns the recommendations and corresponding scores for the given users. + + Attributes: + user_ids (torch.Tensor): IDs of users. + news_ids (torch.Tensor): IDs of the candidates news. + scores (torch.Tensor): Predicted scores for the candidate news. + cand_news_size (torch.Tensor): Number of candidate news for each user. + + Returns: + Dict[int, Dict[str, List[Any]]]: A dictionary with user IDs as keys and an inner dictionary of recommendations and corresponding scores as values. + """ + users = torch.repeat_interleave(user_ids.detach().cpu(), cand_news_size).tolist() + users = ["U" + str(uid) for uid in users] + news = ["N" + str(nid) for nid in news_ids.detach().cpu().tolist()] + scores = scores.detach().cpu().tolist() + + # dictionary of recommendations and scores for each user + recommendations_dico = {} + for user, news, score in zip(users, news, scores): + if user not in recommendations_dico: + recommendations_dico[user] = {} + recommendations_dico[user][news] = score + + return recommendations_dico + + def _save_recommendations(self, recommendations: Dict[int, Dict[str, List[Any]]], fpath: str): + with open(fpath, "w") as f: + json.dump(recommendations, f) From e0a1baab85930f3aa067544de51d54d7c05d0ae2 Mon Sep 17 00:00:00 2001 From: andreeaiana Date: Tue, 19 Mar 2024 11:55:29 +0100 Subject: [PATCH 3/5] Streamline user IDs and indices; add news_ids to batch --- newsreclib/data/components/news_dataset.py | 4 +++ newsreclib/data/components/rec_dataset.py | 30 +++++++++++++++------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/newsreclib/data/components/news_dataset.py b/newsreclib/data/components/news_dataset.py index 35d36ef..7ee6f11 100644 --- a/newsreclib/data/components/news_dataset.py +++ b/newsreclib/data/components/news_dataset.py @@ -101,6 +101,10 @@ def _tokenize_plm(self, text: List[str]): def _tokenize_df(self, df: pd.DataFrame) -> Dict[str, Any]: batch_out = {} + # news IDs (i.e., keep only numeric part of unique NID) + nids = np.array([int(nid.split("N")[-1]) for nid in df.index.values]) + batch_out["news_ids"] = torch.from_numpy(nids).long() + if not self.concatenate_inputs: # prepare text if not self.use_plm: diff --git a/newsreclib/data/components/rec_dataset.py b/newsreclib/data/components/rec_dataset.py index 93ace66..c45dec9 100644 --- a/newsreclib/data/components/rec_dataset.py +++ b/newsreclib/data/components/rec_dataset.py @@ -36,10 +36,13 @@ def __init__( self.max_history_len = max_history_len self.neg_sampling_ratio = neg_sampling_ratio - def __getitem__(self, index: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]: + def __getitem__( + self, index: Any + ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]: bhv = self.behaviors.iloc[index] - user = np.array([int(bhv["user"])]) + user_id = np.array([int(bhv["uid"].split("U")[-1])]) + user_idx = np.array([int(bhv["user"])]) history = np.array(bhv["history"])[: self.max_history_len] candidates = np.array(bhv["candidates"]) labels = np.array(bhv["labels"]) @@ -49,7 +52,7 @@ def __getitem__(self, index: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFram history = self.news.loc[history] candidates = self.news.loc[candidates] - return user, history, candidates, labels + return user_id, user_idx, history, candidates, labels def __len__(self) -> int: return len(self.behaviors) @@ -98,10 +101,13 @@ def __init__(self, news: pd.DataFrame, behaviors: pd.DataFrame, max_history_len: self.behaviors = behaviors self.max_history_len = max_history_len - def __getitem__(self, idx: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]: + def __getitem__( + self, idx: Any + ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.DataFrame, np.ndarray]: bhv = self.behaviors.iloc[idx] - user = np.array([int(bhv["user"])]) + user_id = np.array([int(bhv["uid"].split("U")[-1])]) + user_idx = np.array([int(bhv["user"])]) history = np.array(bhv["history"])[: self.max_history_len] candidates = np.array(bhv["candidates"]) labels = np.array(bhv["labels"]) @@ -109,7 +115,7 @@ def __getitem__(self, idx: Any) -> Tuple[np.ndarray, pd.DataFrame, pd.DataFrame, history = self.news.loc[history] candidates = self.news.loc[candidates] - return user, history, candidates, labels + return user_id, user_idx, history, candidates, labels def __len__(self) -> int: return len(self.behaviors) @@ -140,7 +146,7 @@ def __init__( self.concatenate_inputs = concatenate_inputs def __call__(self, batch) -> RecommendationBatch: - users, histories, candidates, labels = zip(*batch) + user_ids, user_idx, histories, candidates, labels = zip(*batch) batch_hist = self._make_batch_asignees(histories) batch_cand = self._make_batch_asignees(candidates) @@ -148,7 +154,8 @@ def __call__(self, batch) -> RecommendationBatch: x_hist = self._tokenize_df(pd.concat(histories)) x_cand = self._tokenize_df(pd.concat(candidates)) labels = torch.from_numpy(np.concatenate(labels)).float() - users = torch.from_numpy(np.concatenate(users)).long() + user_ids = torch.from_numpy(np.concatenate(user_ids)).long() + user_idx = torch.from_numpy(np.concatenate(user_idx)).long() return RecommendationBatch( batch_hist=batch_hist, @@ -156,7 +163,8 @@ def __call__(self, batch) -> RecommendationBatch: x_hist=x_hist, x_cand=x_cand, labels=labels, - users=users, + user_ids=user_ids, + user_idx=user_idx, ) def _tokenize_embeddings(self, text: List[List[int]], max_len: Optional[int]) -> torch.Tensor: @@ -177,6 +185,10 @@ def _tokenize_plm(self, text: List[str]): def _tokenize_df(self, df: pd.DataFrame) -> Dict[str, Any]: batch_out = {} + # news IDs (i.e., keep only numeric part of unique NID) + nids = np.array([int(nid.split("N")[-1]) for nid in df.index.values]) + batch_out["news_ids"] = torch.from_numpy(nids).long() + if not self.concatenate_inputs: # prepare text if not self.use_plm: From eca9f54f6c7c9dee72868ba7c84b0bdc64464ddc Mon Sep 17 00:00:00 2001 From: andreeaiana Date: Tue, 19 Mar 2024 11:56:32 +0100 Subject: [PATCH 4/5] Add optional saving of recommendation list in on_test_epoch_end --- .../models/fair_rec/manner_cr_module.py | 38 +++++++++++++++++- newsreclib/models/fair_rec/manner_module.py | 34 ++++++++++++++++ .../models/fair_rec/senti_debias_module.py | 36 ++++++++++++++++- newsreclib/models/fair_rec/sentirec_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/caum_module.py | 38 +++++++++++++++++- .../models/general_rec/cen_news_rec_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/dkn_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/lstur_module.py | 40 +++++++++++++++++-- newsreclib/models/general_rec/miner_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/mins_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/naml_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/npa_module.py | 40 +++++++++++++++++-- newsreclib/models/general_rec/nrms_module.py | 38 +++++++++++++++++- newsreclib/models/general_rec/tanr_module.py | 38 +++++++++++++++++- 14 files changed, 503 insertions(+), 27 deletions(-) diff --git a/newsreclib/models/fair_rec/manner_cr_module.py b/newsreclib/models/fair_rec/manner_cr_module.py index 45a5250..071ab67 100644 --- a/newsreclib/models/fair_rec/manner_cr_module.py +++ b/newsreclib/models/fair_rec/manner_cr_module.py @@ -60,6 +60,10 @@ class CRModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -86,6 +90,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -98,6 +104,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss self.criterion = self._get_loss(self.hparams.loss) @@ -259,6 +268,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -319,6 +330,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -329,10 +343,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -366,7 +382,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -420,6 +436,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -455,6 +473,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -483,5 +504,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/fair_rec/manner_module.py b/newsreclib/models/fair_rec/manner_module.py index 5e4d3b9..723a3d8 100644 --- a/newsreclib/models/fair_rec/manner_module.py +++ b/newsreclib/models/fair_rec/manner_module.py @@ -41,6 +41,10 @@ class MANNERModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -58,6 +62,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -70,6 +76,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # load ensemble components self.cr_module = CRModule.load_from_checkpoint( checkpoint_path=self.hparams.cr_module_module_ckpt @@ -208,6 +217,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -237,6 +248,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( preds, targets, @@ -246,6 +260,8 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): @@ -264,6 +280,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # collect step outputs for metric computation @@ -287,6 +305,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -315,5 +336,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/fair_rec/senti_debias_module.py b/newsreclib/models/fair_rec/senti_debias_module.py index 2402553..288241d 100644 --- a/newsreclib/models/fair_rec/senti_debias_module.py +++ b/newsreclib/models/fair_rec/senti_debias_module.py @@ -283,6 +283,10 @@ class SentiDebiasModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. alpha_coefficient: @@ -305,6 +309,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, alpha_coefficient: float, beta_coefficient: float, @@ -323,6 +329,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss self.rec_loss = self._get_loss("cross_entropy_loss") self.a_loss = self._get_loss("cross_entropy_loss") @@ -416,6 +425,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: _, bias_free_scores, _, _, _ = self.forward(batch) @@ -445,6 +456,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( preds, targets, @@ -454,6 +468,8 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): @@ -532,7 +548,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # collect step outputs for metric computation self.val_step_outputs = self._collect_step_outputs( @@ -581,6 +597,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # collect step outputs for metric computation @@ -604,6 +622,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -632,6 +653,19 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/fair_rec/sentirec_module.py b/newsreclib/models/fair_rec/sentirec_module.py index 17dbdaa..2cb347f 100644 --- a/newsreclib/models/fair_rec/sentirec_module.py +++ b/newsreclib/models/fair_rec/sentirec_module.py @@ -68,6 +68,10 @@ class SentiRecModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -97,6 +101,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -109,6 +115,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -278,6 +287,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores, sent_scores = self.forward(batch) @@ -366,6 +377,9 @@ def model_step( [torch.where(mask_cand[n])[0].shape[0] for n in range(mask_cand.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -376,10 +390,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -413,7 +429,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -467,6 +483,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -502,6 +520,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -530,6 +551,19 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self.test_step_outputs = self._clear_epoch_outputs( self.test_step_outputs diff --git a/newsreclib/models/general_rec/caum_module.py b/newsreclib/models/general_rec/caum_module.py index e4019c9..f1aff46 100644 --- a/newsreclib/models/general_rec/caum_module.py +++ b/newsreclib/models/general_rec/caum_module.py @@ -83,6 +83,10 @@ class CAUMModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -120,6 +124,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -132,6 +138,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -365,6 +374,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -438,6 +449,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -448,10 +462,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -485,7 +501,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -539,6 +555,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -574,6 +592,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -602,5 +623,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/cen_news_rec_module.py b/newsreclib/models/general_rec/cen_news_rec_module.py index 9525981..42359a7 100644 --- a/newsreclib/models/general_rec/cen_news_rec_module.py +++ b/newsreclib/models/general_rec/cen_news_rec_module.py @@ -70,6 +70,10 @@ class CenNewsRecModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -101,6 +105,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -113,6 +119,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -278,6 +287,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -351,6 +362,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -361,10 +375,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -398,7 +414,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -452,6 +468,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -487,6 +505,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -515,5 +536,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/dkn_module.py b/newsreclib/models/general_rec/dkn_module.py index 9925d70..a290376 100644 --- a/newsreclib/models/general_rec/dkn_module.py +++ b/newsreclib/models/general_rec/dkn_module.py @@ -59,6 +59,10 @@ class DKNModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -84,6 +88,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -96,6 +102,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -244,6 +253,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -317,6 +328,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -327,10 +341,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -364,7 +380,7 @@ def on_train_epoch_end(self) -> None: self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -418,6 +434,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -453,6 +471,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -481,5 +502,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/lstur_module.py b/newsreclib/models/general_rec/lstur_module.py index 47b4336..7c902cf 100644 --- a/newsreclib/models/general_rec/lstur_module.py +++ b/newsreclib/models/general_rec/lstur_module.py @@ -77,6 +77,10 @@ class LSTURModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -110,6 +114,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -123,6 +129,9 @@ def __init__( self.num_sent_classes = self.hparams.num_sent_classes + 1 self.num_users = self.hparams.num_users + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -281,7 +290,7 @@ def forward(self, batch: RecommendationBatch) -> torch.Tensor: ) if not self.hparams.late_fusion: # encode user - user_vector = self.user_encoder(batch["users"], hist_news_vector_agg, hist_size) + user_vector = self.user_encoder(batch["user_idx"], hist_news_vector_agg, hist_size) else: # aggregate embeddings of clicked news user_vector = torch.div(hist_news_vector_agg.sum(dim=1), hist_size.unsqueeze(dim=-1)) @@ -308,6 +317,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -381,6 +392,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -391,10 +405,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -428,7 +444,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -482,6 +498,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -517,6 +535,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -545,5 +566,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/miner_module.py b/newsreclib/models/general_rec/miner_module.py index 1499564..694c556 100644 --- a/newsreclib/models/general_rec/miner_module.py +++ b/newsreclib/models/general_rec/miner_module.py @@ -78,6 +78,10 @@ class MINERModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -109,6 +113,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -121,6 +127,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -328,6 +337,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores, user_vector = self.forward(batch) @@ -411,6 +422,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -421,10 +435,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -458,7 +474,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -512,6 +528,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -547,6 +565,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -575,5 +596,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/mins_module.py b/newsreclib/models/general_rec/mins_module.py index bd9633d..cd79b6c 100644 --- a/newsreclib/models/general_rec/mins_module.py +++ b/newsreclib/models/general_rec/mins_module.py @@ -69,6 +69,10 @@ class MINSModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -99,6 +103,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -111,6 +117,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -283,6 +292,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -356,6 +367,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -366,10 +380,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -403,7 +419,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -457,6 +473,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -492,6 +510,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -520,5 +541,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/naml_module.py b/newsreclib/models/general_rec/naml_module.py index 2c90b31..c9f4c71 100644 --- a/newsreclib/models/general_rec/naml_module.py +++ b/newsreclib/models/general_rec/naml_module.py @@ -69,6 +69,10 @@ class NAMLModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -99,6 +103,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -111,6 +117,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -291,6 +300,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -364,6 +375,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -374,10 +388,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -411,7 +427,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -465,6 +481,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -500,6 +518,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -528,5 +549,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/npa_module.py b/newsreclib/models/general_rec/npa_module.py index 6a7e517..fe4ec23 100644 --- a/newsreclib/models/general_rec/npa_module.py +++ b/newsreclib/models/general_rec/npa_module.py @@ -64,6 +64,10 @@ class NPAModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -90,6 +94,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -102,6 +108,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -212,7 +221,7 @@ def forward(self, batch: RecommendationBatch) -> torch.Tensor: ) # project users - projected_users = self.user_projection(batch["users"]) + projected_users = self.user_projection(batch["user_idx"]) # encode user history hist_news_vector = self.news_encoder(batch["x_hist"]["title"], hist_size, projected_users) @@ -255,6 +264,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -328,6 +339,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -338,10 +352,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -375,7 +391,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -429,6 +445,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -464,6 +482,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -492,5 +513,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/nrms_module.py b/newsreclib/models/general_rec/nrms_module.py index e8dd2ac..5e03233 100644 --- a/newsreclib/models/general_rec/nrms_module.py +++ b/newsreclib/models/general_rec/nrms_module.py @@ -62,6 +62,10 @@ class NRMSModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -89,6 +93,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -101,6 +107,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -260,6 +269,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores = self.forward(batch) @@ -333,6 +344,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -343,10 +357,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -380,7 +396,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -434,6 +450,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -469,6 +487,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -497,5 +518,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) diff --git a/newsreclib/models/general_rec/tanr_module.py b/newsreclib/models/general_rec/tanr_module.py index 7b0cf6b..46faf74 100644 --- a/newsreclib/models/general_rec/tanr_module.py +++ b/newsreclib/models/general_rec/tanr_module.py @@ -71,6 +71,10 @@ class TANRModule(AbstractRecommneder): The number of topical categories. num_sent_classes: The number of sentiment classes. + save_recs: + Whether to save the recommendations (i.e., candidates news and corresponding scores) to disk in JSON format. + recs_fpath: + Path where to save the list of recommendations and corresponding scores for users. optimizer: Optimizer used for model training. scheduler: @@ -101,6 +105,8 @@ def __init__( top_k_list: List[int], num_categ_classes: int, num_sent_classes: int, + save_recs: bool, + recs_fpath: Optional[str], optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler, ) -> None: @@ -113,6 +119,9 @@ def __init__( self.num_categ_classes = self.hparams.num_categ_classes + 1 self.num_sent_classes = self.hparams.num_sent_classes + 1 + if self.hparams.save_recs: + assert isinstance(self.hparams.recs_fpath, str) + # initialize recommendation loss if not self.hparams.dual_loss_training: self.criterion = self._get_loss(self.hparams.loss) @@ -291,6 +300,8 @@ def model_step( torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, + torch.Tensor, ]: scores, topic_scores = self.forward(batch) @@ -372,6 +383,9 @@ def model_step( [torch.where(mask_hist[n])[0].shape[0] for n in range(mask_hist.shape[0])] ) + user_ids = batch["user_ids"] + cand_news_ids = batch["x_cand"]["news_ids"] + return ( loss, preds, @@ -382,10 +396,12 @@ def model_step( target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) def training_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log loss self.train_loss(loss) @@ -419,7 +435,7 @@ def on_train_epoch_end(self) -> None: self.training_step_outputs = self._clear_epoch_outputs(self.training_step_outputs) def validation_step(self, batch: RecommendationBatch, batch_idx: int): - loss, preds, targets, cand_news_size, _, _, _, _, _ = self.model_step(batch) + loss, preds, targets, cand_news_size, _, _, _, _, _, _, _ = self.model_step(batch) # update and log metrics self.val_loss(loss) @@ -473,6 +489,8 @@ def test_step(self, batch: RecommendationBatch, batch_idx: int): target_sentiments, hist_categories, hist_sentiments, + user_ids, + cand_news_ids, ) = self.model_step(batch) # update and log metrics @@ -508,6 +526,9 @@ def on_test_epoch_end(self) -> None: cand_indexes = torch.arange(cand_news_size.shape[0]).repeat_interleave(cand_news_size) hist_indexes = torch.arange(hist_news_size.shape[0]).repeat_interleave(hist_news_size) + user_ids = self._gather_step_outputs(self.test_step_outputs, "user_ids") + cand_news_ids = self._gather_step_outputs(self.test_step_outputs, "cand_news_ids") + # update metrics self.test_rec_metrics(preds, targets, **{"indexes": cand_indexes}) self.test_categ_div_metrics(preds, target_categories, cand_indexes) @@ -536,5 +557,18 @@ def on_test_epoch_end(self) -> None: self.test_sent_pers_metrics, on_step=False, on_epoch=True, prog_bar=True, logger=True ) + # save recommendations + if self.hparams.save_recs: + recommendations_dico = self._get_recommendations( + user_ids=user_ids, + news_ids=cand_news_ids, + scores=preds, + cand_news_size=cand_news_size, + ) + print(recommendations_dico) + self._save_recommendations( + recommendations=recommendations_dico, fpath=self.hparams.recs_fpath + ) + # clear memory for the next epoch self.test_step_outputs = self._clear_epoch_outputs(self.test_step_outputs) From 1b33d384ea5b3e77d1c8c78e206e9a6129bf768e Mon Sep 17 00:00:00 2001 From: andreeaiana Date: Tue, 19 Mar 2024 11:57:25 +0100 Subject: [PATCH 5/5] Update model configs with hyperparameters for saving recommendations lists --- configs/model/caum.yaml | 4 ++++ configs/model/cen_news_rec.yaml | 4 ++++ configs/model/dkn.yaml | 4 ++++ configs/model/lstur.yaml | 4 ++++ configs/model/manner_cr_module.yaml | 4 ++++ configs/model/manner_module.yaml | 4 ++++ configs/model/miner.yaml | 4 ++++ configs/model/mins.yaml | 4 ++++ configs/model/naml.yaml | 4 ++++ configs/model/npa.yaml | 4 ++++ configs/model/nrms.yaml | 4 ++++ configs/model/senti_debias.yaml | 4 ++++ configs/model/sentirec.yaml | 4 ++++ configs/model/tanr.yaml | 4 ++++ 14 files changed, 56 insertions(+) diff --git a/configs/model/caum.yaml b/configs/model/caum.yaml index f936cda..9e4d361 100644 --- a/configs/model/caum.yaml +++ b/configs/model/caum.yaml @@ -47,12 +47,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/cen_news_rec.yaml b/configs/model/cen_news_rec.yaml index aba8165..cf62ca6 100644 --- a/configs/model/cen_news_rec.yaml +++ b/configs/model/cen_news_rec.yaml @@ -41,12 +41,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/dkn.yaml b/configs/model/dkn.yaml index e138b4f..11b2289 100644 --- a/configs/model/dkn.yaml +++ b/configs/model/dkn.yaml @@ -33,12 +33,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/lstur.yaml b/configs/model/lstur.yaml index 47f7af4..c7e28f3 100644 --- a/configs/model/lstur.yaml +++ b/configs/model/lstur.yaml @@ -43,12 +43,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/manner_cr_module.yaml b/configs/model/manner_cr_module.yaml index 83c4f8a..8a28358 100644 --- a/configs/model/manner_cr_module.yaml +++ b/configs/model/manner_cr_module.yaml @@ -34,12 +34,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/manner_module.yaml b/configs/model/manner_module.yaml index 1b3adb8..be2c06a 100644 --- a/configs/model/manner_module.yaml +++ b/configs/model/manner_module.yaml @@ -18,12 +18,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: null scheduler: null diff --git a/configs/model/miner.yaml b/configs/model/miner.yaml index aeefaf4..8aa28de 100644 --- a/configs/model/miner.yaml +++ b/configs/model/miner.yaml @@ -39,12 +39,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/mins.yaml b/configs/model/mins.yaml index 07aafaf..6514885 100644 --- a/configs/model/mins.yaml +++ b/configs/model/mins.yaml @@ -40,12 +40,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/naml.yaml b/configs/model/naml.yaml index abf9aea..94a2483 100644 --- a/configs/model/naml.yaml +++ b/configs/model/naml.yaml @@ -38,12 +38,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/npa.yaml b/configs/model/npa.yaml index 4b2b2c6..40282a3 100644 --- a/configs/model/npa.yaml +++ b/configs/model/npa.yaml @@ -32,12 +32,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/nrms.yaml b/configs/model/nrms.yaml index b60ed12..13546b9 100644 --- a/configs/model/nrms.yaml +++ b/configs/model/nrms.yaml @@ -35,12 +35,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/senti_debias.yaml b/configs/model/senti_debias.yaml index 8ff4251..ad7ca59 100644 --- a/configs/model/senti_debias.yaml +++ b/configs/model/senti_debias.yaml @@ -45,6 +45,8 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # loss coefficients @@ -55,6 +57,8 @@ beta_coefficient: 10 top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: null diff --git a/configs/model/sentirec.yaml b/configs/model/sentirec.yaml index def2ea3..8b9ade1 100644 --- a/configs/model/sentirec.yaml +++ b/configs/model/sentirec.yaml @@ -41,12 +41,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam diff --git a/configs/model/tanr.yaml b/configs/model/tanr.yaml index 18bfe2a..41765b3 100644 --- a/configs/model/tanr.yaml +++ b/configs/model/tanr.yaml @@ -40,12 +40,16 @@ outputs: "target_sentiments", "hist_categories", "hist_sentiments", + "user_ids", + "cand_news_ids", ] # evaluation top_k_list: [5, 10] num_categ_classes: 18 num_sent_classes: 3 +save_recs: False +recs_fpath: "${paths.output_dir}/recommendations.json" optimizer: _target_: torch.optim.Adam