From 7e8bc2e2658ac4f77bba741c697ae1033dc56920 Mon Sep 17 00:00:00 2001 From: zhengdao-chen Date: Sat, 4 May 2019 16:55:37 -0400 Subject: [PATCH] deleted logger --- src/Logger.py | 254 ---------------------------------------- src/main_gnn.py | 29 ++--- src/main_lgnn.py | 28 ++--- src/script_5SBM_gnn.sh | 1 - src/script_5SBM_lgnn.sh | 1 - 5 files changed, 23 insertions(+), 290 deletions(-) delete mode 100644 src/Logger.py diff --git a/src/Logger.py b/src/Logger.py deleted file mode 100644 index 22f3846..0000000 --- a/src/Logger.py +++ /dev/null @@ -1,254 +0,0 @@ -import numpy as np -import os -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from scipy.spatial import ConvexHull - -import torch -import torch.nn as nn -from torch.autograd import Variable -from torch import optim -import torch.nn.functional as F - -from losses import compute_loss_multiclass, compute_accuracy_multiclass - -if torch.cuda.is_available(): - dtype = torch.cuda.FloatTensor - dtype_l = torch.cuda.LongTensor - # torch.cuda.manual_seed(0) -else: - dtype = torch.FloatTensor - dtype_l = torch.LongTensor - # torch.manual_seed(0) - -def compute_recovery_rate(pred, labels): - pred = pred.max(2)[1] - error = 1 - torch.eq(pred, labels).type(dtype)#.squeeze(2) - frob_norm = error.mean(1)#.squeeze(1) - accuracy = 1 - frob_norm - accuracy = accuracy.mean(0).squeeze() - return accuracy.data.cpu().numpy()#[0] - -def from_scores_to_labels(pred): - pred = pred.squeeze(0) - labels_pred = (pred[:, 0] > pred[:, 1]).type(dtype) * 2.0 - 1 - return labels_pred - -def compute_accuracy_bcd(labels_pred, labels): - labels = labels.squeeze(0) - # print (pred) - # print (labels_pred) - # print (labels) - dot_product = labels_pred.dot(labels.type(dtype)).data - if torch.cuda.is_available(): - dot_product = dot_product.cpu() - acc = abs(dot_product.numpy() / labels.data.shape[0]) - return acc - -def from_scores_to_labels_batch(pred): - # pred = pred.squeeze(0) - labels_pred = (pred[:, :, 0] > pred[:, :, 1]).type(dtype) * 2.0 - 1 - return labels_pred.unsqueeze(1) - -def from_scores_to_labels_mcd_batch(pred): - # pred = pred.squeeze(0) - # labels_pred = (pred[:, :, 0] > pred[:, :, 1]).type(dtype) * 2.0 - 1 - labels_pred = np.argmax(pred, axis = 2).astype(int) - return labels_pred - -def from_scores_to_labels_batch_2(pred): - # pred = pred.squeeze(0) - pred_sorted, indices = pred[:, :, 0].sort(1) - labels_pred = torch.zeros(pred.data.shape[0], 1, pred.data.shape[1]).type(dtype) - for i in range(pred.data.shape[0]): - mid_pt = pred.data.shape[1] // 2 - if torch.cuda.is_available(): - indices = indices.cpu() - indices_for_plus1 = indices.data.numpy()[i, :pred.data.shape[1] // 2].tolist() - indices_for_neg1 = indices.data.numpy()[i, pred.data.shape[1] // 2 :].tolist() - labels_pred_ith = labels_pred[i, :, :] - labels_pred_ith[:, indices_for_plus1] = 1 - labels_pred_ith[:, indices_for_neg1] = -1 - labels_pred[i, :, :] = labels_pred_ith - # - # labels_pred[i, :, indices.data.numpy()[i, :pred.shape[1] // 2].tolist()] = 1 - # labels_pred[i, :, indices.data.numpy()[i, pred.shape[1] // 2:].tolist()] = -1 - # labels_pred = (pred[:, :, 0] > pred[:, :, 1]).type(dtype) * 2.0 - 1 - return Variable(labels_pred) - -def compute_accuracy_bcd_batch(labels_pred, labels): - # labels = labels.squeeze(0) - # print (pred) - # print (labels_pred) - # print (labels) - # dot_product = labels_pred.dot(labels.type(dtype)).data - - # print (labels.data.shape) - labels = labels.unsqueeze(2) - # print (labels_pred.data.shape) - # print (labels.data.shape) - # labels.unsqueeze(2) - # print (labels.data.shape) - # print ('labels_pred', labels_pred) - # print ('labels', labels) - dot_product = torch.bmm(labels_pred, labels.type(dtype)).data - # print ('dot product', dot_product) - # for i in range(labels.data.shape[0]): - # print ('labels_pred', labels_pred[i].squeeze(0)) - # print ('labels_true', labels[i].squeeze(1)) - if torch.cuda.is_available(): - dot_product = dot_product.cpu() - acc = np.mean(abs(dot_product.numpy()) / labels.data.shape[1]) - return acc - -def compute_accuracy_mcd_batch(labels_pred, labels): - overlap = (labels_pred == labels).astype(int) - acc = np.mean(labels_pred == labels) - return acc - -class Logger(object): - def __init__(self, path_logger): - directory = os.path.join(path_logger, 'plots/') - self.path = path_logger - self.path_dir = directory - # Create directory if necessary - try: - os.stat(directory) - except: - os.mkdir(directory) - self.loss_train = [] - self.loss_test = [] - self.accuracy_train = [] - self.accuracy_test = [] - self.args = None - - def write_settings(self, args): - self.args = {} - # write info - path = os.path.join(self.path, 'experiment.txt') - with open(path, 'w') as file: - for arg in vars(args): - file.write(str(arg) + ' : ' + str(getattr(args, arg)) + '\n') - self.args[str(arg)] = getattr(args, arg) - - def save_model(self, model): - save_dir = os.path.join(self.path, 'parameters/') - # Create directory if necessary - try: - os.stat(save_dir) - except: - os.mkdir(save_dir) - path = os.path.join(save_dir, 'gnn.pt') - torch.save(model, path) - print('Model Saved.') - - def load_model(self): - load_dir = os.path.join(self.path, 'parameters/') - # check if any training has been done before. - try: - os.stat(load_dir) - except: - print("Training has not been done before testing. This session will be terminated.") - sys.exit() - path = os.path.join(load_dir, 'gnn.pt') - print('Loading the most recent model...') - siamese_gnn = torch.load(path) - return siamese_gnn - - def add_train_loss(self, loss): - self.loss_train.append(loss.data.cpu().numpy()) - - def add_test_loss(self, loss): - self.loss_test.append(loss.data.cpu().numpy()) - - def add_train_accuracy(self, pred, labels): - accuracy = compute_recovery_rate(pred, labels) - self.accuracy_train.append(accuracy) - - def add_train_accuracy_bcd(self, pred, labels): - labels_pred = from_scores_to_labels_batch(pred) - accuracy = compute_accuracy_bcd_batch(labels_pred, labels) - self.accuracy_train.append(accuracy) - - def add_train_accuracy_mcd(self, pred, labels, n_classes): - # pred = pred.data.numpy() - # labels = labels.data.numpy() - # labels_pred = from_scores_to_labels_mcd_batch(pred) - # accuracy = compute_accuracy_mcd_batch(labels_pred, labels) - # self.accuracy_train.append(accuracy) - - accuracy = compute_accuracy_multiclass(pred, labels, n_classes) - self.accuracy_train.append(accuracy) - - def add_test_accuracy(self, pred, labels): - accuracy = compute_recovery_rate(pred, labels) - self.accuracy_test.append(accuracy) - - def add_test_accuracy_bcd(self, pred, labels): - # labels_pred = from_scores_to_labels_batch_2(pred) - labels_pred = from_scores_to_labels_batch(pred) - accuracy = compute_accuracy_bcd_batch(labels_pred, labels) - self.accuracy_test.append(accuracy) - - def add_test_accuracy_mcd(self, pred, labels, n_classes): - accuracy = compute_accuracy_multiclass(pred, labels, n_classes) - self.accuracy_test.append(accuracy) - - - def plot_train_loss(self): - plt.figure(0) - plt.clf() - iters = range(len(self.loss_train)) - plt.semilogy(iters, self.loss_train, 'b') - plt.xlabel('iterations') - plt.ylabel('Cross Entropy Loss') - plt.title('Training Loss: p={}, p_e={}' - .format(self.args['edge_density'], self.args['noise'])) - path = os.path.join(self.path_dir, 'training_loss.png') - plt.savefig(path) - - def plot_test_loss(self): - plt.figure(1) - plt.clf() - test_freq = self.args['test_freq'] - iters = test_freq * np.arange(len(self.loss_test)) - plt.semilogy(iters.tolist(), self.loss_test, 'b') - plt.xlabel('iterations') - plt.ylabel('Cross Entropy Loss') - plt.title('Testing Loss: p={}, p_e={}' - .format(self.args['edge_density'], self.args['noise'])) - path = os.path.join(self.path_dir, 'testing_loss.png') - plt.savefig(path) - - def plot_train_accuracy(self): - plt.figure(0) - plt.clf() - iters = range(len(self.accuracy_train)) - plt.plot(iters, self.accuracy_train, 'b') - plt.xlabel('iterations') - plt.ylabel('Accuracy') - plt.title('Training Accuracy: p={}, p_e={}' - .format(self.args['edge_density'], self.args['noise'])) - path = os.path.join(self.path_dir, 'training_accuracy.png') - plt.savefig(path) - - def plot_test_accuracy(self): - plt.figure(1) - plt.clf() - test_freq = self.args['test_freq'] - iters = test_freq * np.arange(len(self.accuracy_test)) - plt.plot(iters.tolist(), self.accuracy_test, 'b') - plt.xlabel('iterations') - plt.ylabel('Accuracy') - plt.title('Testing Accuracy: p={}, p_e={}' - .format(self.args['edge_density'], self.args['noise'])) - path = os.path.join(self.path_dir, 'testing_accuracy.png') - plt.savefig(path) - - def save_results(self): - path = os.path.join(self.path, 'results.npz') - np.savez(path, accuracy_train=np.array(self.accuracy_train), - accuracy_test=np.array(self.accuracy_test), - loss_train=self.loss_train, loss_test=self.loss_test) diff --git a/src/main_gnn.py b/src/main_gnn.py index 2d575dc..0e4a211 100644 --- a/src/main_gnn.py +++ b/src/main_gnn.py @@ -7,7 +7,6 @@ from data_generator import Generator from load import get_lg_inputs, get_gnn_inputs from models import GNN_bcd, GNN_multiclass -from Logger import Logger import time import matplotlib matplotlib.use('Agg') @@ -52,7 +51,6 @@ default='ErdosRenyi') parser.add_argument('--batch_size', nargs='?', const=1, type=int, default=1) parser.add_argument('--mode', nargs='?', const=1, type=str, default='train') -parser.add_argument('--path_logger', nargs='?', const=1, type=str, default='') parser.add_argument('--path_gnn', nargs='?', const=1, type=str, default='') parser.add_argument('--filename_existing_gnn', nargs='?', const=1, type=str, default='') parser.add_argument('--print_freq', nargs='?', const=1, type=int, default=100) @@ -96,7 +94,7 @@ template3 = '{:<10} {:<10} {:<10} ' template4 = '{:<10} {:<10.5f} {:<10.5f} \n' -def train_mcd_single(gnn, optimizer, logger, gen, n_classes, it): +def train_mcd_single(gnn, optimizer, gen, n_classes, it): start = time.time() W, labels = gen.sample_otf_single(is_training=True, cuda=torch.cuda.is_available()) labels = labels.type(dtype_l) @@ -132,7 +130,7 @@ def train_mcd_single(gnn, optimizer, logger, gen, n_classes, it): else: loss_value = float(loss.data.numpy()) - info = ['epoch', 'avg loss', 'avg acc', 'edge_density', + info = ['iter', 'avg loss', 'avg acc', 'edge_density', 'noise', 'model', 'elapsed'] out = [it, loss_value, acc, args.edge_density, args.noise, 'GNN', elapsed] @@ -144,13 +142,13 @@ def train_mcd_single(gnn, optimizer, logger, gen, n_classes, it): return loss_value, acc -def train(gnn, logger, gen, n_classes=args.n_classes, iters=args.num_examples_train): +def train(gnn, gen, n_classes=args.n_classes, iters=args.num_examples_train): gnn.train() optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr) loss_lst = np.zeros([iters]) acc_lst = np.zeros([iters]) for it in range(iters): - loss_single, acc_single = train_mcd_single(gnn, optimizer, logger, gen, n_classes, it) + loss_single, acc_single = train_mcd_single(gnn, optimizer, gen, n_classes, it) loss_lst[it] = loss_single acc_lst[it] = acc_single torch.cuda.empty_cache() @@ -158,7 +156,7 @@ def train(gnn, logger, gen, n_classes=args.n_classes, iters=args.num_examples_tr print ('Avg train acc', np.mean(acc_lst)) print ('Std train acc', np.std(acc_lst)) -def test_mcd_single(gnn, logger, gen, n_classes, iter): +def test_mcd_single(gnn, gen, n_classes, it): start = time.time() W, labels = gen.sample_otf_single(is_training=False, cuda=torch.cuda.is_available()) @@ -196,9 +194,9 @@ def test_mcd_single(gnn, logger, gen, n_classes, iter): else: loss_value = float(loss_test.data.numpy()) - info = ['epoch', 'avg loss', 'avg acc', 'edge_density', + info = ['iter', 'avg loss', 'avg acc', 'edge_density', 'noise', 'model', 'elapsed'] - out = [iter, loss_value, acc_test, args.edge_density, + out = [it, loss_value, acc_test, args.edge_density, args.noise, 'GNN', elapsed] print(template1.format(*info)) print(template2.format(*out)) @@ -208,13 +206,13 @@ def test_mcd_single(gnn, logger, gen, n_classes, iter): return loss_value, acc_test -def test(gnn, logger, gen, n_classes, iters=args.num_examples_test): +def test(gnn, gen, n_classes, iters=args.num_examples_test): gnn.train() loss_lst = np.zeros([iters]) acc_lst = np.zeros([iters]) for it in range(iters): # inputs, labels, W = gen.sample_single(it, cuda=torch.cuda.is_available(), is_training=False) - loss_single, acc_single = test_mcd_single(gnn, logger, gen, n_classes, it) + loss_single, acc_single = test_mcd_single(gnn, gen, n_classes, it) loss_lst[it] = loss_single acc_lst[it] = acc_single torch.cuda.empty_cache() @@ -230,9 +228,6 @@ def count_parameters(model): if __name__ == '__main__': # print (args.eval_vs_train) - logger = Logger(args.path_logger) - logger.write_settings(args) - ## One fixed generator gen = Generator() ## generator setup @@ -295,9 +290,9 @@ def count_parameters(model): gnn.cuda() print ('Training begins') if (args.generative_model == 'SBM'): - train(gnn, logger, gen, 2) + train(gnn, gen, 2) elif (args.generative_model == 'SBM_multiclass'): - train(gnn, logger, gen, args.n_classes) + train(gnn, gen, args.n_classes) print ('Saving gnn ' + filename) if torch.cuda.is_available(): torch.save(gnn.cpu(), path_plus_name) @@ -314,7 +309,7 @@ def count_parameters(model): print ('model status: train') gnn.train() - test(gnn, logger, gen, args.n_classes) + test(gnn, gen, args.n_classes) print ('total num of params:', count_parameters(gnn)) diff --git a/src/main_lgnn.py b/src/main_lgnn.py index 16d4a1e..f45fe24 100644 --- a/src/main_lgnn.py +++ b/src/main_lgnn.py @@ -7,7 +7,6 @@ from data_generator import Generator from load import get_lg_inputs from models import lGNN_multiclass -from Logger import Logger import time import matplotlib matplotlib.use('Agg') @@ -51,7 +50,6 @@ parser.add_argument('--batch_size', nargs='?', const=1, type=int, default=1) parser.add_argument('--mode', nargs='?', const=1, type=str, default='train') parser.add_argument('--path_dataset', nargs='?', const=1, type=str, default='') -parser.add_argument('--path_logger', nargs='?', const=1, type=str, default='') parser.add_argument('--path_gnn', nargs='?', const=1, type=str, default='') parser.add_argument('--filename_existing_gnn', nargs='?', const=1, type=str, default='') parser.add_argument('--print_freq', nargs='?', const=1, type=int, default=100) @@ -95,7 +93,7 @@ template3 = '{:<10} {:<10} {:<10} ' template4 = '{:<10} {:<10.5f} {:<10.5f} \n' -def train_mcd_single(gnn, optimizer, logger, gen, n_classes, it): +def train_mcd_single(gnn, optimizer, gen, n_classes, it): start = time.time() W, labels = gen.sample_otf_single(is_training=True, cuda=torch.cuda.is_available()) labels = labels.type(dtype_l) @@ -134,7 +132,7 @@ def train_mcd_single(gnn, optimizer, logger, gen, n_classes, it): else: loss_value = float(loss.data.numpy()) - info = ['epoch', 'avg loss', 'avg acc', 'edge_density', + info = ['iter', 'avg loss', 'avg acc', 'edge_density', 'noise', 'model', 'elapsed'] out = [it, loss_value, acc, args.edge_density, args.noise, 'LGNN', elapsed] @@ -149,7 +147,7 @@ def train_mcd_single(gnn, optimizer, logger, gen, n_classes, it): return loss_value, acc -def train(gnn, logger, gen, n_classes=args.n_classes, iters=args.num_examples_train): +def train(gnn, gen, n_classes=args.n_classes, iters=args.num_examples_train): gnn.train() optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr) loss_lst = np.zeros([iters]) @@ -158,7 +156,7 @@ def train(gnn, logger, gen, n_classes=args.n_classes, iters=args.num_examples_tr # W, labels = gen.sample_otf_single(is_training=True, cuda=torch.cuda.is_available()) # WW, x, WW_lg, y, P = get_lg_inputs(W, args.J) # print ("Num of edges: ", np.sum(W)) - loss_single, acc_single = train_mcd_single(gnn, optimizer, logger, gen, n_classes, it) + loss_single, acc_single = train_mcd_single(gnn, optimizer, gen, n_classes, it) loss_lst[it] = loss_single acc_lst[it] = acc_single torch.cuda.empty_cache() @@ -166,7 +164,6 @@ def train(gnn, logger, gen, n_classes=args.n_classes, iters=args.num_examples_tr if (it % 100 == 0) and (it >= 100): # print ('Testing at check_pt begins') print ('Check_pt at iteration ' + str(it) + ' :') - # test(gnn, logger, gen, args.n_classes, iters = 20) print ('Avg train loss', np.mean(loss_lst[it-100:it])) print ('Avg train acc', np.mean(acc_lst[it-100:it])) print ('Std train acc', np.std(acc_lst[it-100:it])) @@ -175,7 +172,7 @@ def train(gnn, logger, gen, n_classes=args.n_classes, iters=args.num_examples_tr print ('Final avg train acc', np.mean(acc_lst)) print ('Final std train acc', np.std(acc_lst)) -def test_mcd_single(gnn, logger, gen, n_classes, it): +def test_mcd_single(gnn, gen, n_classes, it): start = time.time() W, labels = gen.sample_otf_single(is_training=False, cuda=torch.cuda.is_available()) @@ -216,7 +213,7 @@ def test_mcd_single(gnn, logger, gen, n_classes, it): else: loss_value = float(loss_test.data.numpy()) - info = ['epoch', 'avg loss', 'avg acc', 'edge_density', + info = ['iter', 'avg loss', 'avg acc', 'edge_density', 'noise', 'model', 'elapsed'] out = [it, loss_value, acc_test, args.edge_density, args.noise, 'LGNN', elapsed] @@ -231,13 +228,13 @@ def test_mcd_single(gnn, logger, gen, n_classes, it): return loss_value, acc_test -def test(gnn, logger, gen, n_classes, iters=args.num_examples_test): +def test(gnn, gen, n_classes, iters=args.num_examples_test): gnn.train() loss_lst = np.zeros([iters]) acc_lst = np.zeros([iters]) for it in range(iters): # inputs, labels, W = gen.sample_single(it, cuda=torch.cuda.is_available(), is_training=False) - loss_single, acc_single = test_mcd_single(gnn, logger, gen, n_classes, it) + loss_single, acc_single = test_mcd_single(gnn, gen, n_classes, it) loss_lst[it] = loss_single acc_lst[it] = acc_single torch.cuda.empty_cache() @@ -252,9 +249,6 @@ def test(gnn, logger, gen, n_classes, iters=args.num_examples_test): print ('main file starts here') - logger = Logger(args.path_logger) - logger.write_settings(args) - # ## One fixed generator gen = Generator() # generator setup @@ -315,9 +309,9 @@ def test(gnn, logger, gen, n_classes, iters=args.num_examples_test): gnn.cuda() print ('Training begins') if (args.generative_model == 'SBM'): - train(gnn, logger, gen, 2) + train(gnn, gen, 2) elif (args.generative_model == 'SBM_multiclass'): - train(gnn, logger, gen, args.n_classes) + train(gnn, gen, args.n_classes) print ('Saving gnn ' + filename) if torch.cuda.is_available(): torch.save(gnn.cpu(), path_plus_name) @@ -333,4 +327,4 @@ def test(gnn, logger, gen, n_classes, iters=args.num_examples_test): print ('model status: train') gnn.train() - test(gnn, logger, gen, args.n_classes) + test(gnn, gen, args.n_classes) diff --git a/src/script_5SBM_gnn.sh b/src/script_5SBM_gnn.sh index fe86892..9007497 100644 --- a/src/script_5SBM_gnn.sh +++ b/src/script_5SBM_gnn.sh @@ -1,5 +1,4 @@ python3 main_gnn.py \ ---path_logger '' \ --path_gnn '' \ --filename_existing_gnn '' \ --num_examples_train 6000 \ diff --git a/src/script_5SBM_lgnn.sh b/src/script_5SBM_lgnn.sh index b1a3360..cef89b0 100644 --- a/src/script_5SBM_lgnn.sh +++ b/src/script_5SBM_lgnn.sh @@ -1,5 +1,4 @@ python3 main_lgnn.py \ ---path_logger '' \ --path_gnn '' \ --filename_existing_gnn '' \ --num_examples_train 6000 \