Merge pull request #44 from IndicoDataSolutions/Chris/python3-compat

ADD: python3 compat
IndicoDataSolutions · Sep 10, 2018 · af6e100 · af6e100
2 parents 4b8be6d + 1d5bdf1
commit af6e100
Show file tree

Hide file tree

Showing 10 changed files with 74 additions and 77 deletions.
diff --git a/examples/gender.py b/examples/gender.py
@@ -13,10 +13,8 @@
 trX, teX, trY, teY = load_gender_data(ntrain=10000) # Can increase up to 250K or so
 
 tokenizer = Tokenizer(min_df=10, max_features=50000)
-print trX[1] # see a blog example
 trX = tokenizer.fit_transform(trX)
 teX = tokenizer.transform(teX)
-print tokenizer.n_features
 
 layers = [
     Embedding(size=128, n_features=tokenizer.n_features),
@@ -33,7 +31,6 @@
     tr_acc = metrics.accuracy_score(trY[:len(teY)], tr_preds > 0.5)
     te_acc = metrics.accuracy_score(teY, te_preds > 0.5)
 
-    print i, tr_acc, te_acc
 
 save(model, 'save_test.pkl') # How to save
 
@@ -44,5 +41,3 @@
 
 tr_acc = metrics.accuracy_score(trY[:len(teY)], tr_preds > 0.5)
 te_acc = metrics.accuracy_score(teY, te_preds > 0.5)
-
-print tr_acc, te_acc
diff --git a/examples/load.py b/examples/load.py
@@ -22,14 +22,14 @@ def load_mnist(data_dir=None):
         import gzip
         url = 'http://yann.lecun.com/exdb/mnist/'
         fnames = [
-            'train-images-idx3-ubyte.gz', 
-            'train-labels-idx1-ubyte.gz', 
-            't10k-images-idx3-ubyte.gz', 
+            'train-images-idx3-ubyte.gz',
+            'train-labels-idx1-ubyte.gz',
+            't10k-images-idx3-ubyte.gz',
             't10k-labels-idx1-ubyte.gz'
         ]
         for fname in fnames:
             if not os.path.isfile(fname):
-                print 'data_dir not given and file not local - downloading mnist file:', fname
+                print("data_dir not given and file not local - downloading mnist file:", fname)
                 urllib.urlretrieve(url+fname, fname)
         data_dir = ''
     fd = gzip.open(os.path.join(data_dir,'train-images-idx3-ubyte.gz'))
@@ -53,5 +53,5 @@ def load_mnist(data_dir=None):
 
     trX = trX.reshape(-1, 28, 28)
     teX = teX.reshape(-1, 28, 28)
-    
+
     return trX, teX, trY, teY
diff --git a/examples/mnist.py b/examples/mnist.py
@@ -9,7 +9,7 @@
 
 trX, teX, trY, teY = load_mnist()
 
-#Use generic layer - RNN processes a size 28 vector at a time scanning from left to right 
+#Use generic layer - RNN processes a size 28 vector at a time scanning from left to right
 layers = [
 	Generic(size=28),
 	GatedRecurrent(size=512, p_drop=0.2),
@@ -28,6 +28,3 @@
 
 tr_acc = np.mean(trY[:len(teY)] == np.argmax(tr_preds, axis=1))
 te_acc = np.mean(teY == np.argmax(te_preds, axis=1))
-
-# Test accuracy should be between 98.9% and 99.3%
-print 'train accuracy', tr_acc, 'test accuracy', te_acc
diff --git a/passage/inits.py b/passage/inits.py
@@ -3,7 +3,7 @@
 import theano
 import theano.tensor as T
 
-from theano_utils import sharedX, floatX, intX
+from passage.theano_utils import sharedX, floatX, intX
 
 def uniform(shape, scale=0.05):
     return sharedX(np.random.uniform(low=-scale, high=scale, size=shape))

diff --git a/passage/iterators.py b/passage/iterators.py
@@ -1,21 +1,21 @@
 import numpy as np
 
-from utils import shuffle, iter_data
-from theano_utils import floatX, intX
+from passage.utils import shuffle, iter_data
+from passage.theano_utils import floatX, intX
 
 def padded(seqs):
-    lens = map(len, seqs)
+    lens = [len(seq) for seq in seqs]
     max_len = max(lens)
     seqs_padded = []
     for seq, seq_len in zip(seqs, lens):
-        n_pad = max_len - seq_len 
+        n_pad = max_len - seq_len
         seq = [0] * n_pad + seq
         seqs_padded.append(seq)
-    return np.asarray(seqs_padded).transpose(1, 0)
+    return np.atleast_2d(seqs_padded).transpose(1, 0)
 
 class Linear(object):
     """
-    Useful for training on real valued data where first dimension is examples, 
+    Useful for training on real valued data where first dimension is examples,
     second dimension is to be iterated over, and third dimension is data vectors.
 
     size is the number of examples per minibatch
@@ -42,7 +42,7 @@ def iterX(self, X):
             yield xmb
 
     def iterXY(self, X, Y):
-        
+
         if self.shuffle:
             X, Y = shuffle(X, Y)
 
@@ -70,7 +70,7 @@ def iterX(self, X):
             yield self.x_dtype(xmb)
 
     def iterXY(self, X, Y):
-        
+
         if self.shuffle:
             X, Y = shuffle(X, Y)
 
@@ -93,10 +93,10 @@ def iterX(self, X):
             chunk_idxs = [chunk_idxs[idx] for idx in sort]
             for xmb, idxmb in iter_data(x_chunk, chunk_idxs, size=self.size):
                 xmb = padded(xmb)
-                yield self.x_dtype(xmb), idxmb   
+                yield self.x_dtype(xmb), idxmb
 
     def iterXY(self, X, Y):
-        
+
         if self.shuffle:
             X, Y = shuffle(X, Y)
 
@@ -108,4 +108,4 @@ def iterXY(self, X, Y):
             mb_chunks = shuffle(mb_chunks)
             for xmb, ymb in mb_chunks:
                 xmb = padded(xmb)
-                yield self.x_dtype(xmb), self.y_dtype(ymb)  
+                yield self.x_dtype(xmb), self.y_dtype(ymb)
diff --git a/passage/layers.py b/passage/layers.py
@@ -4,9 +4,9 @@
 from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 from copy import deepcopy
 
-from theano_utils import shared0s, floatX
-import activations
-import inits
+from passage.theano_utils import shared0s, floatX
+import passage.activations as activations
+import passage.inits as inits
 
 import numpy as np
 
@@ -24,7 +24,7 @@ def theano_one_hot(idx, n):
 srng = RandomStreams()
 
 class Generic(object):
-    """ 
+    """
     Useful when processing real valued vectors see examples/mnist.py for example usage.
 
     size is input dimensionality
@@ -39,7 +39,7 @@ def __init__(self, size, weights=None):
         self.params = []
 
     def output(self, dropout_active=False):
-        return self.input       
+        return self.input
 
 class Embedding(object):
 
@@ -100,10 +100,10 @@ def connect(self, l_in):
             self.b_in = shared0s((self.size))
             self.w_rec = self.init((self.size, self.size))
         self.params = [self.h0, self.w_in, self.b_in, self.w_rec]
-        
+
         if self.weights is not None:
             for param, weight in zip(self.params, self.weights):
-                param.set_value(floatX(weight))    
+                param.set_value(floatX(weight))
 
     def step(self, x_t, h_tm1, w):
         h_t = self.activation(x_t + T.dot(h_tm1, w))
@@ -159,13 +159,13 @@ def connect(self, l_in):
         self.u_o = self.init((self.size, self.size))
         self.u_c = self.init((self.size, self.size))
 
-        self.params = [self.w_i, self.w_f, self.w_o, self.w_c, 
-            self.u_i, self.u_f, self.u_o, self.u_c,  
+        self.params = [self.w_i, self.w_f, self.w_o, self.w_c,
+            self.u_i, self.u_f, self.u_o, self.u_c,
             self.b_i, self.b_f, self.b_o, self.b_c]
 
         if self.weights is not None:
             for param, weight in zip(self.params, self.weights):
-                param.set_value(floatX(weight))    
+                param.set_value(floatX(weight))
 
     def step(self, xi_t, xf_t, xo_t, xc_t, h_tm1, c_tm1, u_i, u_f, u_o, u_c):
         i_t = self.gate_activation(xi_t + T.dot(h_tm1, u_i))
@@ -183,9 +183,9 @@ def output(self, dropout_active=False):
         x_f = T.dot(X, self.w_f) + self.b_f
         x_o = T.dot(X, self.w_o) + self.b_o
         x_c = T.dot(X, self.w_c) + self.b_c
-        [out, cells], _ = theano.scan(self.step, 
-            sequences=[x_i, x_f, x_o, x_c], 
-            outputs_info=[T.alloc(0., X.shape[1], self.size), T.alloc(0., X.shape[1], self.size)], 
+        [out, cells], _ = theano.scan(self.step,
+            sequences=[x_i, x_f, x_o, x_c],
+            outputs_info=[T.alloc(0., X.shape[1], self.size), T.alloc(0., X.shape[1], self.size)],
             non_sequences=[self.u_i, self.u_f, self.u_o, self.u_c],
             truncate_gradient=self.truncate_gradient
         )
@@ -198,7 +198,7 @@ class GatedRecurrent(object):
 
     def __init__(self, size=256, activation='tanh', gate_activation='steeper_sigmoid', init='orthogonal', truncate_gradient=-1, seq_output=False, p_drop=0., direction='forward', weights=None):
         self.settings = locals()
-        del self.settings['self']   
+        del self.settings['self']
         self.activation_str = activation
         self.activation = getattr(activations, activation)
         self.gate_activation = getattr(activations, gate_activation)
@@ -225,19 +225,19 @@ def connect(self, l_in):
         self.b_r = shared0s((self.size))
 
         if 'maxout' in self.activation_str:
-            self.w_h = self.init((self.n_in, self.size*2)) 
+            self.w_h = self.init((self.n_in, self.size*2))
             self.u_h = self.init((self.size, self.size*2))
             self.b_h = shared0s((self.size*2))
         else:
-            self.w_h = self.init((self.n_in, self.size)) 
+            self.w_h = self.init((self.n_in, self.size))
             self.u_h = self.init((self.size, self.size))
-            self.b_h = shared0s((self.size))   
+            self.b_h = shared0s((self.size))
 
         self.params = [self.h0, self.w_z, self.w_r, self.w_h, self.u_z, self.u_r, self.u_h, self.b_z, self.b_r, self.b_h]
 
         if self.weights is not None:
             for param, weight in zip(self.params, self.weights):
-                param.set_value(floatX(weight))    
+                param.set_value(floatX(weight))
 
 
     def step(self, xz_t, xr_t, xh_t, h_tm1, u_z, u_r, u_h):
@@ -256,16 +256,16 @@ def output(self, dropout_active=False):
         x_z = T.dot(X, self.w_z) + self.b_z
         x_r = T.dot(X, self.w_r) + self.b_r
         x_h = T.dot(X, self.w_h) + self.b_h
-        out, _ = theano.scan(self.step, 
-            sequences=[x_z, x_r, x_h], 
-            outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)], 
+        out, _ = theano.scan(self.step,
+            sequences=[x_z, x_r, x_h],
+            outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)],
             non_sequences=[self.u_z, self.u_r, self.u_h],
             truncate_gradient=self.truncate_gradient
         )
         if self.seq_output:
             return out
         else:
-            return out[-1]  
+            return out[-1]
 
 class Dense(object):
     def __init__(self, size=256, activation='rectify', init='orthogonal', p_drop=0., weights=None):
@@ -288,10 +288,10 @@ def connect(self, l_in):
             self.w = self.init((self.n_in, self.size))
             self.b = shared0s((self.size))
         self.params = [self.w, self.b]
-        
+
         if self.weights is not None:
             for param, weight in zip(self.params, self.weights):
-                param.set_value(floatX(weight))            
+                param.set_value(floatX(weight))
 
     def output(self, pre_act=False, dropout_active=False):
         X = self.l_in.output(dropout_active=dropout_active)

diff --git a/passage/models.py b/passage/models.py
@@ -4,33 +4,40 @@
 import numpy as np
 from time import time
 
-import costs
-import updates
-import iterators 
-from utils import case_insensitive_import, save
-from preprocessing import LenFilter, standardize_targets
+import passage.costs as costs
+import passage.updates as updates
+import passage.iterators as iterators
+
+from passage.utils import case_insensitive_import, save
+from passage.preprocessing import LenFilter, standardize_targets
 
 def flatten(l):
     return [item for sublist in l for item in sublist]
 
+try:
+    basestring
+    BaseString = basestring
+except NameError:
+    BaseString = (str, bytes)
+
 class RNN(object):
 
     def __init__(self, layers, cost, updater='Adam', verbose=2, Y=T.matrix(), iterator='SortedPadded'):
         self.settings = locals()
         del self.settings['self']
         self.layers = layers
 
-        if isinstance(cost, basestring):
+        if isinstance(cost, BaseString):
             self.cost = case_insensitive_import(costs, cost)
         else:
             self.cost = cost
 
-        if isinstance(updater, basestring):
+        if isinstance(updater, BaseString):
             self.updater = case_insensitive_import(updates, updater)()
         else:
             self.updater = updater
 
-        if isinstance(iterator, basestring):
+        if isinstance(iterator, BaseString):
             self.iterator = case_insensitive_import(iterators, iterator)()
         else:
             self.iterator = iterator
@@ -73,7 +80,6 @@ def fit(self, trX, trY, batch_size=64, n_epochs=1, len_filter=LenFilter(), snaps
         trY = standardize_targets(trY, cost=self.cost)
 
         n = 0.
-        stats = []
         t = time()
         costs = []
         for e in range(n_epochs):
@@ -92,11 +98,11 @@ def fit(self, trX, trY, batch_size=64, n_epochs=1, len_filter=LenFilter(), snaps
 
             status = "Epoch %d Seen %d samples Avg cost %0.4f Time elapsed %d seconds" % (e, n, np.mean(epoch_costs[-250:]), time() - t)
             if self.verbose >= 2:
-                sys.stdout.write("\r"+status) 
+                sys.stdout.write("\r"+status)
                 sys.stdout.flush()
                 sys.stdout.write("\n")
             elif self.verbose == 1:
-                print status
+                print(status)
             if path and e % snapshot_freq == 0:
                 save(self, "{0}.{1}".format(path, e))
         return costs

diff --git a/passage/preprocessing.py b/passage/preprocessing.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import string
+from collections import Counter
 
 import numpy as np
 import theano
 import theano.tensor as T
-import string
-from collections import Counter
 
 punctuation = set(string.punctuation)
 punctuation.add('\n')

diff --git a/passage/updates.py b/passage/updates.py
@@ -2,7 +2,7 @@
 import theano.tensor as T
 import numpy as np
 
-from theano_utils import shared0s, floatX
+from passage.theano_utils import shared0s, floatX
 
 def clip_norm(g, c, n):
     if c > 0:
@@ -176,7 +176,7 @@ def get_updates(self, params, cost):
             p_t = p - (self.lr / T.sqrt(acc_t + self.epsilon)) * g
             p_t = self.regularizer.weight_regularize(p_t)
             updates.append((p, p_t))
-        return updates  
+        return updates
 
 class Adadelta(Update):