Skip to content

Commit

Permalink
Merge pull request #10 from andreeaiana/feat_save_recommendations
Browse files Browse the repository at this point in the history
Feat: save recommendations list
  • Loading branch information
andreeaiana authored Mar 19, 2024
2 parents 3fce487 + 1b33d38 commit 1ac532e
Show file tree
Hide file tree
Showing 35 changed files with 636 additions and 46 deletions.
4 changes: 4 additions & 0 deletions configs/model/caum.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/cen_news_rec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/dkn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/lstur.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/manner_cr_module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/manner_module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer: null
scheduler: null
4 changes: 4 additions & 0 deletions configs/model/miner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/mins.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/naml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/npa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/nrms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/senti_debias.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# loss coefficients
Expand All @@ -55,6 +57,8 @@ beta_coefficient: 10
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer: null

Expand Down
4 changes: 4 additions & 0 deletions configs/model/sentirec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
4 changes: 4 additions & 0 deletions configs/model/tanr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,16 @@ outputs:
"target_sentiments",
"hist_categories",
"hist_sentiments",
"user_ids",
"cand_news_ids",
]

# evaluation
top_k_list: [5, 10]
num_categ_classes: 18
num_sent_classes: 3
save_recs: False
recs_fpath: "${paths.output_dir}/recommendations.json"

optimizer:
_target_: torch.optim.Adam
Expand Down
10 changes: 6 additions & 4 deletions newsreclib/data/components/adressa_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def _load_behaviors(self) -> pd.DataFrame:
log.info("Mapping uid to index.")
behaviors["user"] = behaviors["uid"].apply(lambda x: uid2index.get(x, 0))

behaviors = behaviors[["user", "history", "candidates", "labels"]]
behaviors = behaviors[["uid", "user", "history", "candidates", "labels"]]

# cache processed data
log.info(
Expand All @@ -560,7 +560,7 @@ def _load_behaviors(self) -> pd.DataFrame:

def _process_news_files(
self, filepath
) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, int]]:
) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, str]]:
"""Processes the news data.
Adapted from
Expand Down Expand Up @@ -604,7 +604,9 @@ def _process_news_files(
== event_dict["category1"].split("|")[-1]
)

nid2index = {k: v for k, v in zip(news_title.keys(), range(1, len(news_title) + 1))}
nid2index = {
k: "N" + str(v) for k, v in zip(news_title.keys(), range(1, len(news_title) + 1))
}

return news_title, news_category, news_subcategory, nid2index

Expand Down Expand Up @@ -664,7 +666,7 @@ def _process_users(
and event_dict["id"] in nid2index
):
nindex = nid2index[event_dict["id"]]
uid = event_dict["userId"]
uid = "U" + str(event_dict["userId"])

if uid not in uid2index:
uid2index[uid] = len(uid2index)
Expand Down
9 changes: 6 additions & 3 deletions newsreclib/data/components/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,19 @@ class RecommendationBatch(TypedDict):
Dictionary of news from a the users' candidates, mapping news features to values.
labels:
Ground truth specifying whether the news is relevant to the user.
users:
Users included in the batch.
user_ids:
Original user IDs of the users included in the batch.
user_idx:
Indices of users included in the batch (e.g., for creating embedding matrix).
"""

batch_hist: torch.Tensor
batch_cand: torch.Tensor
x_hist: Dict[str, Any]
x_cand: Dict[str, Any]
labels: torch.Tensor
users: torch.Tensor
user_ids: torch.Tensor
user_idx: torch.Tensor


class NewsBatch(TypedDict):
Expand Down
2 changes: 1 addition & 1 deletion newsreclib/data/components/mind_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def _load_behaviors(self) -> pd.DataFrame:

# cache parsed behaviors
log.info(f"Caching parsed behaviors of size {len(behaviors)} to {parsed_bhv_file}.")
behaviors = behaviors[["user", "history", "candidates", "labels"]]
behaviors = behaviors[["uid", "user", "history", "candidates", "labels"]]
file_utils.to_tsv(behaviors, parsed_bhv_file)

return behaviors
Expand Down
4 changes: 4 additions & 0 deletions newsreclib/data/components/news_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def _tokenize_plm(self, text: List[str]):
def _tokenize_df(self, df: pd.DataFrame) -> Dict[str, Any]:
batch_out = {}

# news IDs (i.e., keep only numeric part of unique NID)
nids = np.array([int(nid.split("N")[-1]) for nid in df.index.values])
batch_out["news_ids"] = torch.from_numpy(nids).long()

if not self.concatenate_inputs:
# prepare text
if not self.use_plm:
Expand Down
Loading

0 comments on commit 1ac532e

Please sign in to comment.