Skip to content

Commit

Permalink
refactor: refactor for lgcnlstmattn
Browse files Browse the repository at this point in the history
  • Loading branch information
asdftyui committed Jun 1, 2023
1 parent cf5c721 commit 35d6adb
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 246 deletions.
56 changes: 25 additions & 31 deletions DKT/config/config_lgcnlstmattn.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@
"n_gpu": 1,
"seed": 42,

"arch": {
"type": "lgcnLSTMattn",
"args": {
"user_num": 7442,
"item_num": 9454,
"embedding_dim": 64,
"gamma": 1e-4,
"lambda": 0.8
}
},
"data_loader": {
"type": "lgcnLSTMattnDataLoader",
"args":{
Expand All @@ -21,29 +11,30 @@
"shuffle": true,
"num_workers": 2,
"validation_split": 0.2,
"asset_dir": "/opt/ml/level2_dkt-recsys-09/DKT/asset"
"asset_dir": "/opt/ml/level2_dkt-recsys-09/DKT/asset",
"max_seq_len": 200
}
},

"optimizer": {
"type": "Adam",
"args":{
"lr": 0.001,
"weight_decay": 0,
"amsgrad": true
}
"name": "adam",
"lr": 0.001,
"weight_decay": 0.01,
"amsgrad": true
},

"loss": "lgcnLSTMattn_loss",
"metrics": [
"accuracy",
"auc"
],
"lr_scheduler": {
"type": "StepLR",
"args": {
"step_size": 50,
"gamma": 0.1
}

"scheduler": {
"name": "plateau",
"warmup_steps": 0,
"total_steps": 0
},

"model": {
"name": "geslstmattn",
"max_seq_len": 200,
Expand All @@ -53,8 +44,14 @@
"drop_out": 0.4,
"gcn_n_layes": 2,
"alpha": 1.0,
"beta": 1.0
"beta": 1.0,
"n_questions": 0,
"n_test": 0,
"n_tag": 0,
"gcn_n_items": 0,
"device": "cuda"
},

"trainer": {
"n_epochs": 60,
"batch_size": 70,
Expand All @@ -68,15 +65,12 @@
"verbosity": 2,

"monitor": "min val_loss",
"early_stop": 10,

"tensorboard": false
"early_stop": 10
},

"test": {
"data_dir": "~/input/data/test_data_modify.csv",
"model_dir": "./saved/models/LGCNtrans/0518_033541/model_best.pth",
"submission_dir": "~/level2_dkt-recsys-09/DKT/submission/lgcnLSTMattn_submission.csv",
"sample_submission_dir": "~/input/data/sample_submission.csv",
"model_dir": "/opt/ml/level2_dkt-recsys-09/DKT/saved/lgcnLSTMattn/model.pt",
"submission_dir": "/opt/ml/level2_dkt-recsys-09/DKT/submission/",
"batch_size": 512
}
}
25 changes: 12 additions & 13 deletions DKT/data_loader/dataloader_lgcnlstmattn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


class Preprocess:
def __init__(self, **args):
def __init__(self, args):
self.args = args
self.train_data = None
self.test_data = None
Expand All @@ -42,15 +42,15 @@ def split_data(self, data, ratio=0.8, shuffle=True, seed=0):
return data_1, data_2

def __save_labels(self, encoder, name):
le_path = os.path.join(self.args['asset_dir'], name + "_classes.npy")
le_path = os.path.join(self.args['data_loader']['args']['asset_dir'], name + "_classes.npy")
np.save(le_path, encoder.classes_)

def __preprocessing(self, df, is_train=True):
cate_cols = ["assessmentItemID", "testId", "KnowledgeTag"]


if not os.path.exists(self.args['asset_dir']):
os.makedirs(self.args['asset_dir'])
if not os.path.exists(self.args['data_loader']['args']['asset_dir']):
os.makedirs(self.args['data_loader']['args']['asset_dir'])


for col in cate_cols:
Expand All @@ -62,7 +62,7 @@ def __preprocessing(self, df, is_train=True):
le.fit(a)
self.__save_labels(le, col)
else:
label_path = os.path.join(self.args['asset_dir'], col + "_classes.npy")
label_path = os.path.join(self.args['data_loader']['args']['asset_dir'], col + "_classes.npy")
le.classes_ = np.load(label_path)

df[col] = df[col].apply(
Expand Down Expand Up @@ -97,21 +97,20 @@ def __feature_engineering(self, df, is_train):
return df

def load_data_from_file(self, file_name, is_train=True):
csv_file_path = os.path.join(self.args['data_dir'], file_name)
csv_file_path = os.path.join(self.args['data_loader']['args']['data_dir'], file_name)
df = pd.read_csv(csv_file_path, parse_dates=['Timestamp']) # , nrows=100000)
df = self.__feature_engineering(df, is_train)
df = self.__preprocessing(df, is_train)

# 추후 feature를 embedding할 시에 embedding_layer의 input 크기를 결정할때 사용

self.args['n_questions'] = len(
np.load(os.path.join(self.args['asset_dir'], "assessmentItemID_classes.npy"))
self.args['model']['n_questions'] = len(
np.load(os.path.join(self.args['data_loader']['args']['asset_dir'], "assessmentItemID_classes.npy"))
)
self.args['n_test'] = len(
np.load(os.path.join(self.args['asset_dir'], "testId_classes.npy"))
self.args['model']['n_test'] = len(
np.load(os.path.join(self.args['data_loader']['args']['asset_dir'], "testId_classes.npy"))
)
self.args['n_tag'] = len(
np.load(os.path.join(self.args['asset_dir'], "KnowledgeTag_classes.npy"))
self.args['model']['n_tag'] = len(
np.load(os.path.join(self.args['data_loader']['args']['asset_dir'], "KnowledgeTag_classes.npy"))
)

df = df.sort_values(by=["userID", "Timestamp"], axis=0)
Expand Down
145 changes: 0 additions & 145 deletions DKT/data_loader/preprocess_lgcntrans.py

This file was deleted.

11 changes: 8 additions & 3 deletions DKT/model/model_lgcnlstmattn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@
gc.collect()
torch.cuda.empty_cache()

try:
from transformers.modeling_bert import BertConfig, BertEncoder, BertModel
except:
from transformers.models.bert.modeling_bert import BertConfig, BertEncoder, BertModel


class GESLSTMATTN(nn.Module):
def __init__(self, adj_matrix, **args):
super(GESLSTMATTN, self).__init__()
self.args = args
self.device = self.args.device
self.device = self.args['device']

# Set Parameter
self.CONTISIZE = 6
Expand Down Expand Up @@ -141,7 +146,7 @@ def get_embedding(self, edge_index: Adj, edge_weight) -> Tensor:
out = out + x
out = out / (self.gcn_n_layes + 1)

padding = torch.tensor([[0] * (self.hidden_dim // 3)]).to(self.args.device)
padding = torch.tensor([[0] * (self.hidden_dim // 3)]).to(self.device)
out = torch.cat((padding, out))

return out
Expand All @@ -159,7 +164,7 @@ def get_GES_embedding(self):
out = torch.stack(embeddings_list, dim=1)
out = torch.mean(out, dim=1)

padding = torch.tensor([[0] * (self.hidden_dim // 3)]).to(self.args.device)
padding = torch.tensor([[0] * (self.hidden_dim // 3)]).to(self.device)
out = torch.cat((padding, out))
return out
# ========================================================================================
10 changes: 5 additions & 5 deletions DKT/model/optimizer_lgcnlstmattn.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from torch.optim import Adam, AdamW


def get_optimizer(model, args):
if args.optimizer == "adam":
optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=0.01)
if args.optimizer == "adamW":
optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=0.01)
def get_optimizer(model, **args):
if args['name'] == "adam":
optimizer = Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
if args['name'] == "adamW":
optimizer = AdamW(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

# 모든 parameter들의 grad값을 0으로 초기화
optimizer.zero_grad()
Expand Down
10 changes: 5 additions & 5 deletions DKT/model/scheduler_lgcnlstmattn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
from transformers import get_linear_schedule_with_warmup


def get_scheduler(optimizer, args):
if args.scheduler == "plateau":
def get_scheduler(optimizer, **args):
if args['name'] == "plateau":
scheduler = ReduceLROnPlateau(
optimizer, patience=10, factor=0.5, mode="max", verbose=True
)
elif args.scheduler == "linear_warmup":
elif args['name'] == "linear_warmup":
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=args.warmup_steps,
num_training_steps=args.total_steps,
num_warmup_steps=args['warmup_steps'],
num_training_steps=args['total_steps'],
)
return scheduler
Loading

0 comments on commit 35d6adb

Please sign in to comment.