diff --git a/examples/gender.py b/examples/gender.py index 4763a5f..2b39a51 100644 --- a/examples/gender.py +++ b/examples/gender.py @@ -13,10 +13,8 @@ trX, teX, trY, teY = load_gender_data(ntrain=10000) # Can increase up to 250K or so tokenizer = Tokenizer(min_df=10, max_features=50000) -print trX[1] # see a blog example trX = tokenizer.fit_transform(trX) teX = tokenizer.transform(teX) -print tokenizer.n_features layers = [ Embedding(size=128, n_features=tokenizer.n_features), @@ -33,7 +31,6 @@ tr_acc = metrics.accuracy_score(trY[:len(teY)], tr_preds > 0.5) te_acc = metrics.accuracy_score(teY, te_preds > 0.5) - print i, tr_acc, te_acc save(model, 'save_test.pkl') # How to save @@ -44,5 +41,3 @@ tr_acc = metrics.accuracy_score(trY[:len(teY)], tr_preds > 0.5) te_acc = metrics.accuracy_score(teY, te_preds > 0.5) - -print tr_acc, te_acc \ No newline at end of file diff --git a/examples/load.py b/examples/load.py index ff0e5c1..bc8666c 100644 --- a/examples/load.py +++ b/examples/load.py @@ -22,14 +22,14 @@ def load_mnist(data_dir=None): import gzip url = 'http://yann.lecun.com/exdb/mnist/' fnames = [ - 'train-images-idx3-ubyte.gz', - 'train-labels-idx1-ubyte.gz', - 't10k-images-idx3-ubyte.gz', + 'train-images-idx3-ubyte.gz', + 'train-labels-idx1-ubyte.gz', + 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz' ] for fname in fnames: if not os.path.isfile(fname): - print 'data_dir not given and file not local - downloading mnist file:', fname + print("data_dir not given and file not local - downloading mnist file:", fname) urllib.urlretrieve(url+fname, fname) data_dir = '' fd = gzip.open(os.path.join(data_dir,'train-images-idx3-ubyte.gz')) @@ -53,5 +53,5 @@ def load_mnist(data_dir=None): trX = trX.reshape(-1, 28, 28) teX = teX.reshape(-1, 28, 28) - + return trX, teX, trY, teY \ No newline at end of file diff --git a/examples/mnist.py b/examples/mnist.py index 88e56c2..ed08ce0 100644 --- a/examples/mnist.py +++ b/examples/mnist.py @@ -9,7 +9,7 @@ trX, teX, trY, teY = load_mnist() -#Use generic layer - RNN processes a size 28 vector at a time scanning from left to right +#Use generic layer - RNN processes a size 28 vector at a time scanning from left to right layers = [ Generic(size=28), GatedRecurrent(size=512, p_drop=0.2), @@ -28,6 +28,3 @@ tr_acc = np.mean(trY[:len(teY)] == np.argmax(tr_preds, axis=1)) te_acc = np.mean(teY == np.argmax(te_preds, axis=1)) - -# Test accuracy should be between 98.9% and 99.3% -print 'train accuracy', tr_acc, 'test accuracy', te_acc \ No newline at end of file diff --git a/passage/inits.py b/passage/inits.py index 7d0d6e0..aaa21c6 100644 --- a/passage/inits.py +++ b/passage/inits.py @@ -3,7 +3,7 @@ import theano import theano.tensor as T -from theano_utils import sharedX, floatX, intX +from passage.theano_utils import sharedX, floatX, intX def uniform(shape, scale=0.05): return sharedX(np.random.uniform(low=-scale, high=scale, size=shape)) diff --git a/passage/iterators.py b/passage/iterators.py index 7c9d2e4..5554952 100644 --- a/passage/iterators.py +++ b/passage/iterators.py @@ -1,21 +1,21 @@ import numpy as np -from utils import shuffle, iter_data -from theano_utils import floatX, intX +from passage.utils import shuffle, iter_data +from passage.theano_utils import floatX, intX def padded(seqs): - lens = map(len, seqs) + lens = [len(seq) for seq in seqs] max_len = max(lens) seqs_padded = [] for seq, seq_len in zip(seqs, lens): - n_pad = max_len - seq_len + n_pad = max_len - seq_len seq = [0] * n_pad + seq seqs_padded.append(seq) - return np.asarray(seqs_padded).transpose(1, 0) + return np.atleast_2d(seqs_padded).transpose(1, 0) class Linear(object): """ - Useful for training on real valued data where first dimension is examples, + Useful for training on real valued data where first dimension is examples, second dimension is to be iterated over, and third dimension is data vectors. size is the number of examples per minibatch @@ -42,7 +42,7 @@ def iterX(self, X): yield xmb def iterXY(self, X, Y): - + if self.shuffle: X, Y = shuffle(X, Y) @@ -70,7 +70,7 @@ def iterX(self, X): yield self.x_dtype(xmb) def iterXY(self, X, Y): - + if self.shuffle: X, Y = shuffle(X, Y) @@ -93,10 +93,10 @@ def iterX(self, X): chunk_idxs = [chunk_idxs[idx] for idx in sort] for xmb, idxmb in iter_data(x_chunk, chunk_idxs, size=self.size): xmb = padded(xmb) - yield self.x_dtype(xmb), idxmb + yield self.x_dtype(xmb), idxmb def iterXY(self, X, Y): - + if self.shuffle: X, Y = shuffle(X, Y) @@ -108,4 +108,4 @@ def iterXY(self, X, Y): mb_chunks = shuffle(mb_chunks) for xmb, ymb in mb_chunks: xmb = padded(xmb) - yield self.x_dtype(xmb), self.y_dtype(ymb) \ No newline at end of file + yield self.x_dtype(xmb), self.y_dtype(ymb) \ No newline at end of file diff --git a/passage/layers.py b/passage/layers.py index f0f7f06..d0bd0e6 100644 --- a/passage/layers.py +++ b/passage/layers.py @@ -4,9 +4,9 @@ from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from copy import deepcopy -from theano_utils import shared0s, floatX -import activations -import inits +from passage.theano_utils import shared0s, floatX +import passage.activations as activations +import passage.inits as inits import numpy as np @@ -24,7 +24,7 @@ def theano_one_hot(idx, n): srng = RandomStreams() class Generic(object): - """ + """ Useful when processing real valued vectors see examples/mnist.py for example usage. size is input dimensionality @@ -39,7 +39,7 @@ def __init__(self, size, weights=None): self.params = [] def output(self, dropout_active=False): - return self.input + return self.input class Embedding(object): @@ -100,10 +100,10 @@ def connect(self, l_in): self.b_in = shared0s((self.size)) self.w_rec = self.init((self.size, self.size)) self.params = [self.h0, self.w_in, self.b_in, self.w_rec] - + if self.weights is not None: for param, weight in zip(self.params, self.weights): - param.set_value(floatX(weight)) + param.set_value(floatX(weight)) def step(self, x_t, h_tm1, w): h_t = self.activation(x_t + T.dot(h_tm1, w)) @@ -159,13 +159,13 @@ def connect(self, l_in): self.u_o = self.init((self.size, self.size)) self.u_c = self.init((self.size, self.size)) - self.params = [self.w_i, self.w_f, self.w_o, self.w_c, - self.u_i, self.u_f, self.u_o, self.u_c, + self.params = [self.w_i, self.w_f, self.w_o, self.w_c, + self.u_i, self.u_f, self.u_o, self.u_c, self.b_i, self.b_f, self.b_o, self.b_c] if self.weights is not None: for param, weight in zip(self.params, self.weights): - param.set_value(floatX(weight)) + param.set_value(floatX(weight)) def step(self, xi_t, xf_t, xo_t, xc_t, h_tm1, c_tm1, u_i, u_f, u_o, u_c): i_t = self.gate_activation(xi_t + T.dot(h_tm1, u_i)) @@ -183,9 +183,9 @@ def output(self, dropout_active=False): x_f = T.dot(X, self.w_f) + self.b_f x_o = T.dot(X, self.w_o) + self.b_o x_c = T.dot(X, self.w_c) + self.b_c - [out, cells], _ = theano.scan(self.step, - sequences=[x_i, x_f, x_o, x_c], - outputs_info=[T.alloc(0., X.shape[1], self.size), T.alloc(0., X.shape[1], self.size)], + [out, cells], _ = theano.scan(self.step, + sequences=[x_i, x_f, x_o, x_c], + outputs_info=[T.alloc(0., X.shape[1], self.size), T.alloc(0., X.shape[1], self.size)], non_sequences=[self.u_i, self.u_f, self.u_o, self.u_c], truncate_gradient=self.truncate_gradient ) @@ -198,7 +198,7 @@ class GatedRecurrent(object): def __init__(self, size=256, activation='tanh', gate_activation='steeper_sigmoid', init='orthogonal', truncate_gradient=-1, seq_output=False, p_drop=0., direction='forward', weights=None): self.settings = locals() - del self.settings['self'] + del self.settings['self'] self.activation_str = activation self.activation = getattr(activations, activation) self.gate_activation = getattr(activations, gate_activation) @@ -225,19 +225,19 @@ def connect(self, l_in): self.b_r = shared0s((self.size)) if 'maxout' in self.activation_str: - self.w_h = self.init((self.n_in, self.size*2)) + self.w_h = self.init((self.n_in, self.size*2)) self.u_h = self.init((self.size, self.size*2)) self.b_h = shared0s((self.size*2)) else: - self.w_h = self.init((self.n_in, self.size)) + self.w_h = self.init((self.n_in, self.size)) self.u_h = self.init((self.size, self.size)) - self.b_h = shared0s((self.size)) + self.b_h = shared0s((self.size)) self.params = [self.h0, self.w_z, self.w_r, self.w_h, self.u_z, self.u_r, self.u_h, self.b_z, self.b_r, self.b_h] if self.weights is not None: for param, weight in zip(self.params, self.weights): - param.set_value(floatX(weight)) + param.set_value(floatX(weight)) def step(self, xz_t, xr_t, xh_t, h_tm1, u_z, u_r, u_h): @@ -256,16 +256,16 @@ def output(self, dropout_active=False): x_z = T.dot(X, self.w_z) + self.b_z x_r = T.dot(X, self.w_r) + self.b_r x_h = T.dot(X, self.w_h) + self.b_h - out, _ = theano.scan(self.step, - sequences=[x_z, x_r, x_h], - outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)], + out, _ = theano.scan(self.step, + sequences=[x_z, x_r, x_h], + outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)], non_sequences=[self.u_z, self.u_r, self.u_h], truncate_gradient=self.truncate_gradient ) if self.seq_output: return out else: - return out[-1] + return out[-1] class Dense(object): def __init__(self, size=256, activation='rectify', init='orthogonal', p_drop=0., weights=None): @@ -288,10 +288,10 @@ def connect(self, l_in): self.w = self.init((self.n_in, self.size)) self.b = shared0s((self.size)) self.params = [self.w, self.b] - + if self.weights is not None: for param, weight in zip(self.params, self.weights): - param.set_value(floatX(weight)) + param.set_value(floatX(weight)) def output(self, pre_act=False, dropout_active=False): X = self.l_in.output(dropout_active=dropout_active) diff --git a/passage/models.py b/passage/models.py index b6a74b7..b3e5945 100644 --- a/passage/models.py +++ b/passage/models.py @@ -4,15 +4,22 @@ import numpy as np from time import time -import costs -import updates -import iterators -from utils import case_insensitive_import, save -from preprocessing import LenFilter, standardize_targets +import passage.costs as costs +import passage.updates as updates +import passage.iterators as iterators + +from passage.utils import case_insensitive_import, save +from passage.preprocessing import LenFilter, standardize_targets def flatten(l): return [item for sublist in l for item in sublist] +try: + basestring + BaseString = basestring +except NameError: + BaseString = (str, bytes) + class RNN(object): def __init__(self, layers, cost, updater='Adam', verbose=2, Y=T.matrix(), iterator='SortedPadded'): @@ -20,17 +27,17 @@ def __init__(self, layers, cost, updater='Adam', verbose=2, Y=T.matrix(), iterat del self.settings['self'] self.layers = layers - if isinstance(cost, basestring): + if isinstance(cost, BaseString): self.cost = case_insensitive_import(costs, cost) else: self.cost = cost - if isinstance(updater, basestring): + if isinstance(updater, BaseString): self.updater = case_insensitive_import(updates, updater)() else: self.updater = updater - if isinstance(iterator, basestring): + if isinstance(iterator, BaseString): self.iterator = case_insensitive_import(iterators, iterator)() else: self.iterator = iterator @@ -73,7 +80,6 @@ def fit(self, trX, trY, batch_size=64, n_epochs=1, len_filter=LenFilter(), snaps trY = standardize_targets(trY, cost=self.cost) n = 0. - stats = [] t = time() costs = [] for e in range(n_epochs): @@ -92,11 +98,11 @@ def fit(self, trX, trY, batch_size=64, n_epochs=1, len_filter=LenFilter(), snaps status = "Epoch %d Seen %d samples Avg cost %0.4f Time elapsed %d seconds" % (e, n, np.mean(epoch_costs[-250:]), time() - t) if self.verbose >= 2: - sys.stdout.write("\r"+status) + sys.stdout.write("\r"+status) sys.stdout.flush() sys.stdout.write("\n") elif self.verbose == 1: - print status + print(status) if path and e % snapshot_freq == 0: save(self, "{0}.{1}".format(path, e)) return costs diff --git a/passage/preprocessing.py b/passage/preprocessing.py index c77bcd6..636a9b7 100644 --- a/passage/preprocessing.py +++ b/passage/preprocessing.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import string +from collections import Counter import numpy as np import theano import theano.tensor as T -import string -from collections import Counter punctuation = set(string.punctuation) punctuation.add('\n') diff --git a/passage/updates.py b/passage/updates.py index a232db8..ed772d3 100644 --- a/passage/updates.py +++ b/passage/updates.py @@ -2,7 +2,7 @@ import theano.tensor as T import numpy as np -from theano_utils import shared0s, floatX +from passage.theano_utils import shared0s, floatX def clip_norm(g, c, n): if c > 0: @@ -176,7 +176,7 @@ def get_updates(self, params, cost): p_t = p - (self.lr / T.sqrt(acc_t + self.epsilon)) * g p_t = self.regularizer.weight_regularize(p_t) updates.append((p, p_t)) - return updates + return updates class Adadelta(Update): diff --git a/passage/utils.py b/passage/utils.py index 1d2ebac..c67b78b 100644 --- a/passage/utils.py +++ b/passage/utils.py @@ -1,13 +1,13 @@ +import pickle + +import math import numpy as np import theano import theano.tensor as T -import cPickle def iter_data(*data, **kwargs): size = kwargs.get('size', 128) - batches = len(data[0]) / size - if len(data[0]) % size != 0: - batches += 1 + batches = math.ceil(len(data[0]) / size) for b in range(batches): start = b * size @@ -15,13 +15,11 @@ def iter_data(*data, **kwargs): if len(data) == 1: yield data[0][start:end] else: - yield tuple([d[start:end] for d in data]) + yield tuple([d[start:end] for d in data]) def iter_indices(*data, **kwargs): size = kwargs.get('size', 128) - batches = len(data[0]) / size - if len(data[0]) % size != 0: - batches += 1 + batches = math.ceil(len(data[0]) / size) for b in range(batches): yield b @@ -37,9 +35,10 @@ def case_insensitive_import(module, name): return getattr(module, mapping[name.lower()]) def load(path): - import layers - import models - model = cPickle.load(open(path)) + from passage import layers + from passage import models + with open(path, 'rb') as f: + model = pickle.load(f, encoding="latin-1") model_class = getattr(models, model['model']) model['config']['layers'] = [getattr(layers, layer['layer'])(**layer['config']) for layer in model['config']['layers']] model = model_class(**model['config']) @@ -55,4 +54,4 @@ def save(model, path): layer_configs.append({'layer':layer_name, 'config':layer_config}) model.settings['layers'] = layer_configs serializable_model = {'model':model.__class__.__name__, 'config':model.settings} - cPickle.dump(serializable_model, open(path, 'wb')) \ No newline at end of file + pickle.dump(serializable_model, open(path, 'wb')) \ No newline at end of file