Added experiments 94-99. Function to normalise data. Function to save

weights and biases to disk. #18 #44 #15
JackKelly · Feb 24, 2015 · 342f5e9 · 342f5e9
1 parent 968201b
commit 342f5e9
Show file tree

Hide file tree

Showing 9 changed files with 2,352 additions and 13 deletions.
diff --git a/neuralnilm/net.py b/neuralnilm/net.py
@@ -3,6 +3,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import h5py
 from datetime import datetime, timedelta
 from numpy.random import rand
 from time import time
@@ -35,8 +36,11 @@ class Net(object):
     def __init__(self, source, layers_config, 
                  updates=partial(nesterov_momentum, learning_rate=0.1),
                  experiment_name="", 
-                 validation_interval=10, save_plot_interval=100,
-                 loss_function=lasagne.objectives.mse):
+                 validation_interval=10, 
+                 save_plot_interval=100,
+                 loss_function=lasagne.objectives.mse,
+                 X_processing_func=lambda X: X
+    ):
         """
         Parameters
         ----------
@@ -46,11 +50,12 @@ def __init__(self, source, layers_config,
         """
         print("Initialising network...")
         self.source = source
+        self.updates = updates
         self.experiment_name = experiment_name
         self.validation_interval = validation_interval
         self.save_plot_interval = save_plot_interval
         self.loss_function = loss_function
-        self.updates = updates
+        self.X_processing_func = X_processing_func
 
         self.input_shape = source.input_shape()
         self.output_shape = source.output_shape()
@@ -173,6 +178,7 @@ def _training_loop(self, n_iterations):
                 validation_cost = self.compute_cost(self.X_val, self.y_val).flatten()[0]
                 self.validation_costs.append(validation_cost)
             if not epoch % self.save_plot_interval:
+                self.save_params()
                 self.plot_costs(save=True)
                 self.plot_estimates(save=True, all_sequences=True)
             # Print progress
@@ -250,16 +256,54 @@ def _plot_estimates(self, save=False, seq_i=0, use_validation_data=True,
         return axes
 
     def _plot_filename(self, string, include_epochs=True, end_string=""):
-        n_epochs = len(self.training_costs)
         end_string = str(end_string)
         return (
             self.experiment_name + ("_" if self.experiment_name else "") + 
             string +
-            ("_{:d}epochs".format(n_epochs) if include_epochs else "") + 
+            ("_{:d}epochs".format(self.n_epochs()) if include_epochs else "") +
             ("_" if end_string else "") + end_string +
             ".pdf")
 
+    def n_epochs(self):
+        return len(self.training_costs)
+
+    def save_params(self, filename=None, layers=None, mode='a'):
+        """
+        Save it to HDF in the following format:
+            /epoch<N>/layer<I>/{weights, biases}
+
+        Parameters
+        ----------
+        layers : list of ints
+        """
+        # Process function parameters
+        if filename is None:
+            filename = self.experiment_name + ".hdf5"
+        if layers is None:
+            layers = range(len(self.layers))
+
+        f = h5py.File(filename, mode=mode)
+        epoch_name = 'epoch{:d}'.format(self.n_epochs())
+        epoch_group = f.create_group(epoch_name)
 
+        def _save(layer, data_name, layer_name, attr):
+            try:
+                data = getattr(layer, attr)
+            except AttributeError:
+                pass
+            else:
+                data = data.get_value()
+                layer_group = epoch_group.require_group(layer_name)
+                dataset = layer_group.create_dataset(data_name, data=data)
+
+        for layer_i in layers:
+            layer = self.layers[layer_i]
+            layer_name = 'layer{:d}'.format(layer_i)
+            _save(layer, 'weights', layer_name, 'W')
+            _save(layer, 'biases', layer_name, 'b')
+
+        f.close()
+
 def BLSTMLayer(l_previous, num_units, **kwargs):
     # setup forward and backwards LSTM layers.  Note that
     # LSTMLayer takes a backwards flag. The backwards flag tells

diff --git a/neuralnilm/source.py b/neuralnilm/source.py
@@ -10,7 +10,8 @@
 from collections import OrderedDict
 
 class Source(object):
-    def __init__(self, seq_length, n_seq_per_batch, n_inputs, n_outputs):
+    def __init__(self, seq_length, n_seq_per_batch, n_inputs, n_outputs,
+                 X_processing_func=None):
         super(Source, self).__init__()
         self.seq_length = seq_length
         self.n_seq_per_batch = n_seq_per_batch
@@ -19,6 +20,7 @@ def __init__(self, seq_length, n_seq_per_batch, n_inputs, n_outputs):
         self.queue = Queue(maxsize=2)
         self._stop = threading.Event()
         self._thread = None
+        self.X_processing_func = X_processing_func
 
     def start(self):
         if self._thread is not None:
@@ -30,7 +32,9 @@ def start(self):
     def run(self):
         """Puts training data into a Queue"""
         while not self._stop.is_set():
-            self.queue.put(self._gen_data())
+            X, y = self._gen_data()
+            X, y = self._process_data(X, y)
+            self.queue.put((X, y))
         self.empty_queue()
         self._thread = None
 
@@ -46,7 +50,13 @@ def empty_queue(self):
                 break
 
     def validation_data(self):
-        return self._gen_data(validation=True)
+        X, y = self._gen_data(validation=True)
+        return self._process_data(X, y)
+
+    def _process_data(self, X, y):
+        if self.X_processing_func is not None:
+            X = self.X_processing_func(X)
+        return X, y
 
     def _gen_data(self, validation=False):
         raise NotImplementedError()
@@ -149,7 +159,8 @@ def __init__(self, filename, appliances,
                  subsample_target=1, 
                  input_padding=0,
                  min_off_duration=0,
-                 skip_probability=0):
+                 skip_probability=0,
+                 **kwargs):
         """
         Parameters
         ----------
@@ -170,7 +181,8 @@ def __init__(self, filename, appliances,
             seq_length=seq_length, 
             n_seq_per_batch=5,
             n_inputs=1,
-            n_outputs=1 if output_one_appliance else len(appliances)
+            n_outputs=1 if output_one_appliance else len(appliances),
+            **kwargs
         )
         self.dataset = DataSet(filename)
         self.appliances = appliances
@@ -271,7 +283,9 @@ def _gen_single_example(self, validation=False, appliances=None):
             for appliance_i, appliance in enumerate(activations.keys()):
                 if not np.random.binomial(n=1, p=self.skip_probability):
                     random_appliances.append((appliance_i, appliance))
+
         appliances.extend(random_appliances)
+        appliances = list(set(appliances)) # make unique
 
         for appliance_i, appliance in appliances:
             n_activations = len(activations[appliance])
@@ -479,3 +493,38 @@ def quantize(data, n_bins, all_hot=True, range=(-1, 1), length=None):
                 hist[where:midpoint] = 1
         out[i,:] = hist
     return (out * 2) - 1
+
+
+
+def standardise(X, how='range=2', mean=None, std=None, midrange=None, ptp=None):
+    """Standardise.
+    ftp://ftp.sas.com/pub/neural/FAQ2.html#A_std_in
+    
+    Parameters
+    ----------
+    X : matrix
+        Each sample is in range [0, 1]
+    how : str, {'range=2', 'std=1'}
+        'range=2' sets midrange to 0 and enforces
+        all values to be in the range [-1,1]
+        'std=1' sets mean = 0 and std = 1
+
+    Returns
+    -------
+    new_X : matrix
+        Same shape as `X`.  Sample is in range [lower, upper]
+    """
+    if how == 'std=1':
+        if mean is None:
+            mean = X.mean()
+        if std is None:
+            std = X.std()
+        return (X - mean) / std
+    elif how == 'range=2':
+        if midrange is None:
+            midrange = (X.max() + X.min()) / 2
+        if ptp is None:
+            ptp = X.ptp()
+        return (X - midrange) / (ptp / 2)
+    else:
+        raise RuntimeError("unrecognised how '" + how + "'")
diff --git a/notebooks/test_real_appliance_source.ipynb b/notebooks/test_real_appliance_source.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:4f25ad7cdfab8902eff46184c96727929fbdff674d0f8ed8decc7b3db0d86f98"
+  "signature": "sha256:a2099498a455e1055c6841a562729a5a0b0a2653c0e753a3573dd489c7cd7244"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -17,7 +17,15 @@
      ],
      "language": "python",
      "metadata": {},
-     "outputs": [],
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Couldn't import dot_parser, loading of dot files will not be possible.\n"
+       ]
+      }
+     ],
      "prompt_number": 1
     },
     {
@@ -120,14 +128,76 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "plt.plot(X[0,:,:])\n",
+      "plt.plot(X2[2,:,:])\n",
       "plt.show()"
      ],
      "language": "python",
      "metadata": {},
      "outputs": [],
+     "prompt_number": 16
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from neuralnilm.source import standardise"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
      "prompt_number": 5
     },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "X.mean()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "0.059526937853107344"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "X.std()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "0.12719669533909331"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "X2 = standardise(X, how='std=1', mean=0.06, std=0.18)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 13
+    },
     {
      "cell_type": "code",
      "collapsed": false,