Skip to content

Commit

Permalink
Merge branch 'main' into feat-#12/CV
Browse files Browse the repository at this point in the history
  • Loading branch information
HeewonKwak authored May 31, 2023
2 parents d3ccdd6 + 388736d commit 624d9cf
Show file tree
Hide file tree
Showing 70 changed files with 45,223 additions and 270 deletions.
8 changes: 6 additions & 2 deletions DKT/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ input/
saved/
datasets/
submission/
output/

# editor, os cache directory
.vscode/
Expand All @@ -119,6 +120,9 @@ asset/

# model
save_pic/
*.txt
*.png
*.pickle
*.pickle
*.png
*.pickle
*.pkl
lgbm_model.txt
95 changes: 95 additions & 0 deletions DKT/args_LQ.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import argparse


def parse_args(mode="train"):
parser = argparse.ArgumentParser()

parser.add_argument("--seed", default=42, type=int, help="seed")
parser.add_argument("--device", default="cpu", type=str, help="cpu or gpu")

# -- 데이터 경로 및 파일 이름 설정
parser.add_argument(
"--data_dir",
default="/opt/ml/input/data/",
type=str,
help="data directory",
)
parser.add_argument(
"--asset_dir", default="asset/", type=str, help="data directory"
)
parser.add_argument(
"--file_name", default="train_data.csv", type=str, help="train file name"
)

# -- 모델의 경로 및 이름, 결과 저장
parser.add_argument(
"--model_dir", default="models/", type=str, help="model directory"
)
parser.add_argument(
"--model_name", default="model.pt", type=str, help="model file name"
)
parser.add_argument(
"--output_dir", default="output/", type=str, help="output directory"
)
parser.add_argument(
"--test_file_name", default="test_data.csv", type=str, help="test file name"
)

parser.add_argument(
"--max_seq_len", default=30, type=int, help="max sequence length"
)
parser.add_argument("--num_workers", default=4, type=int, help="number of workers")

# 모델
parser.add_argument(
"--hidden_dim", default=300, type=int, help="hidden dimension size"
)
parser.add_argument("--n_layers", default=2, type=int, help="number of layers")
parser.add_argument("--n_heads", default=4, type=int, help="number of heads")
parser.add_argument("--drop_out", default=0.2, type=float, help="drop out rate")

# 훈련
parser.add_argument("--n_epochs", default=30, type=int, help="number of epochs")
parser.add_argument("--batch_size", default=64, type=int, help="batch size")
parser.add_argument("--lr", default=0.009668, type=float, help="learning rate")
parser.add_argument("--clip_grad", default=10, type=int, help="clip grad")
parser.add_argument("--patience", default=10, type=int, help="for early stopping")

parser.add_argument(
"--log_steps", default=50, type=int, help="print log per n steps"
)

### 중요 ###
parser.add_argument("--model", default="LastQuery", type=str, help="model type")
parser.add_argument("--optimizer", default="adam", type=str, help="optimizer type")
parser.add_argument(
"--scheduler", default="plateau", type=str, help="scheduler type"
)

# -- Data split methods : default(user), k-fold, ...
parser.add_argument(
"--split_method", default="k-fold", type=str, help="data split strategy"
)
parser.add_argument(
"--n_splits", default=5, type=str, help="number of k-fold splits"
)

### Argumentation 관련 ###

parser.add_argument(
"--window", default=True, type=bool, help="Arumentation with stridde window"
)
parser.add_argument(
"--shuffle", default=False, type=bool, help="data shuffle option"
)
parser.add_argument("--stride", default=80, type=int)
parser.add_argument("--shuffle_n", default=2, type=int)

### Tfixup 관련 ###
parser.add_argument("--Tfixup", default=False, type=bool, help="Tfixup")

args = parser.parse_args()

# args.stride = args.max_seq_len

return args
21 changes: 18 additions & 3 deletions DKT/base/base_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import KFold


class BaseDataLoader(DataLoader):
"""
Base class for all data loaders
"""
def __init__(self, dataset, batch_size, shuffle, validation_split, num_workers, collate_fn=default_collate):
def __init__(self, dataset, batch_size, shuffle, validation_split, num_workers, fold, collate_fn=default_collate):
self.validation_split = validation_split
self.shuffle = shuffle
self.fold = fold

self.batch_idx = 0
self.n_samples = len(dataset)
Expand Down Expand Up @@ -42,8 +44,21 @@ def _split_sampler(self, split):
else:
len_valid = int(self.n_samples * split)

valid_idx = idx_full[0:len_valid]
train_idx = np.delete(idx_full, np.arange(0, len_valid))
if self.fold == 0:
valid_idx = idx_full[0:len_valid]
train_idx = np.delete(idx_full, np.arange(0, len_valid))
elif self.fold == 1:
valid_idx = idx_full[len_valid:2*len_valid]
train_idx = np.delete(idx_full, np.arange(0, len_valid))
elif self.fold == 2:
valid_idx = idx_full[2*len_valid:3*len_valid]
train_idx = np.delete(idx_full, np.arange(0, len_valid))
elif self.fold == 3:
valid_idx = idx_full[3*len_valid:4*len_valid]
train_idx = np.delete(idx_full, np.arange(0, len_valid))
else:
valid_idx = idx_full[4*len_valid:]
train_idx = np.delete(idx_full, np.arange(0, len_valid))

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
Expand Down
7 changes: 4 additions & 3 deletions DKT/base/base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ class BaseTrainer:
"""
Base class for all trainers
"""
def __init__(self, model, criterion, metric_ftns, optimizer, config):
def __init__(self, model, criterion, metric_ftns, optimizer, config, fold):
self.config = config
self.logger = config.get_logger('trainer', config['trainer']['verbosity'])

self.model = model
self.criterion = criterion
self.metric_ftns = metric_ftns
self.optimizer = optimizer
self.fold = fold

cfg_trainer = config['trainer']
self.epochs = cfg_trainer['epochs']
Expand Down Expand Up @@ -119,9 +120,9 @@ def _save_checkpoint(self, epoch, save_best=False):
torch.save(state, filename)
self.logger.info("Saving checkpoint: {} ...".format(filename))
if save_best:
best_path = str(self.checkpoint_dir / 'model_best.pth')
best_path = str(self.checkpoint_dir / 'model_best{}.pth'.format(self.fold))
torch.save(state, best_path)
self.logger.info("Saving current best: model_best.pth ...")
self.logger.info("Saving current best: model_best{}.pth ...".format(self.fold))

def _resume_checkpoint(self, resume_path):
"""
Expand Down
50 changes: 0 additions & 50 deletions DKT/config.json

This file was deleted.

79 changes: 79 additions & 0 deletions DKT/config/config_HM.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"name": "HybridModel",
"n_gpu": 1,

"arch": {
"type": "HMModel_lstm",
"args": {
"n_test": 1537,
"n_tag": 912,
"gamma": 1e-4,
"lambda": 0.8,
"hidden_dim": 256,
"n_layers": 3,
"n_heads": 4,
"drop_out": 0.4,
"model_dir": "/opt/ml/level2_dkt-recsys-09/DKT/saved/models/UltraGCN/0524_043901/model_best.pth",
"ultragcn": {
"user_num": 7442,
"item_num": 9454,
"embedding_dim": 64,
"gamma": 1e-4,
"lambda": 0.8
}
}
},
"data_loader": {
"type": "HMDataLoader",
"args":{
"data_dir": "/opt/ml/input/data",
"asset_dir": "./asset",
"batch_size": 512,
"shuffle": true,
"num_workers": 2,
"max_seq_len": 200,
"validation_split": 0.2,
"stride": 10,
"shuffle_n": 2,
"shuffle_aug": false
}
},
"optimizer": {
"type": "Adam",
"args":{
"lr": 0.001,
"weight_decay": 0,
"amsgrad": true
}
},
"loss": "BCE_loss",
"metrics": [
"accuracy", "auc"
],
"lr_scheduler": {
"type": "StepLR",
"args": {
"step_size": 50,
"gamma": 0.1
}
},
"trainer": {
"epochs": 100,

"save_dir": "saved/",
"save_period": 1,
"verbosity": 2,

"monitor": "min val_loss",
"early_stop": 10,

"tensorboard": true
},
"test": {
"data_dir": "~/input/data/test_data_modify.csv",
"model_dir": "/opt/ml/level2_dkt-recsys-09/DKT/saved/models/HybridModel/0524_162035/model_best.pth",
"submission_dir": "~/level2_dkt-recsys-09/DKT/submission/UltraGCN_HM_aug_lstm.csv",
"sample_submission_dir": "~/input/data/sample_submission.csv",
"batch_size": 128
}
}
55 changes: 55 additions & 0 deletions DKT/config/config_LGBM.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"name": "LGBM",
"n_gpu": 1,
"seed":42,
"pic_dir": "save_pic/",
"output_dir": "output/",

"arch": {
"type": "",
"args": {}
},
"data_loader": {
"data_dir": "/opt/ml/input/data/",
"df_train": "train_data.csv",
"df_test": "test_data.csv",
"feature_engineering": false,
"fe_train": "train_featured.csv",
"fe_test": "test_featured.csv",
"asset_dir": "asset/",
"batch_size": 128,
"shuffle": true,
"split_ratio": 0.8,
"num_workers": 2
},
"optimizer": {
"type": "Adam",
"args":{
"lr": 0.001,
"weight_decay": 0,
"amsgrad": true
}
},
"loss": "nll_loss",
"metrics": [
"accuracy", "top_k_acc"
],
"lr_scheduler": {
"type": "StepLR",
"args": {
"step_size": 50,
"gamma": 0.1
}
},
"trainer": {
"num_boost_round": 2500,

"model_dir": "model/",
"verbos_eval": 100,
"threshold": 0.5,

"early_stopping_rounds": 100,

"tuning": false
}
}
Loading

0 comments on commit 624d9cf

Please sign in to comment.