Skip to content

Commit

Permalink
Added experiments 94-99. Function to normalise data. Function to save
Browse files Browse the repository at this point in the history
weights and biases to disk. #18 #44 #15
  • Loading branch information
JackKelly committed Feb 24, 2015
1 parent 968201b commit 342f5e9
Show file tree
Hide file tree
Showing 9 changed files with 2,352 additions and 13 deletions.
54 changes: 49 additions & 5 deletions neuralnilm/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import h5py
from datetime import datetime, timedelta
from numpy.random import rand
from time import time
Expand Down Expand Up @@ -35,8 +36,11 @@ class Net(object):
def __init__(self, source, layers_config,
updates=partial(nesterov_momentum, learning_rate=0.1),
experiment_name="",
validation_interval=10, save_plot_interval=100,
loss_function=lasagne.objectives.mse):
validation_interval=10,
save_plot_interval=100,
loss_function=lasagne.objectives.mse,
X_processing_func=lambda X: X
):
"""
Parameters
----------
Expand All @@ -46,11 +50,12 @@ def __init__(self, source, layers_config,
"""
print("Initialising network...")
self.source = source
self.updates = updates
self.experiment_name = experiment_name
self.validation_interval = validation_interval
self.save_plot_interval = save_plot_interval
self.loss_function = loss_function
self.updates = updates
self.X_processing_func = X_processing_func

self.input_shape = source.input_shape()
self.output_shape = source.output_shape()
Expand Down Expand Up @@ -173,6 +178,7 @@ def _training_loop(self, n_iterations):
validation_cost = self.compute_cost(self.X_val, self.y_val).flatten()[0]
self.validation_costs.append(validation_cost)
if not epoch % self.save_plot_interval:
self.save_params()
self.plot_costs(save=True)
self.plot_estimates(save=True, all_sequences=True)
# Print progress
Expand Down Expand Up @@ -250,16 +256,54 @@ def _plot_estimates(self, save=False, seq_i=0, use_validation_data=True,
return axes

def _plot_filename(self, string, include_epochs=True, end_string=""):
n_epochs = len(self.training_costs)
end_string = str(end_string)
return (
self.experiment_name + ("_" if self.experiment_name else "") +
string +
("_{:d}epochs".format(n_epochs) if include_epochs else "") +
("_{:d}epochs".format(self.n_epochs()) if include_epochs else "") +
("_" if end_string else "") + end_string +
".pdf")

def n_epochs(self):
return len(self.training_costs)

def save_params(self, filename=None, layers=None, mode='a'):
"""
Save it to HDF in the following format:
/epoch<N>/layer<I>/{weights, biases}
Parameters
----------
layers : list of ints
"""
# Process function parameters
if filename is None:
filename = self.experiment_name + ".hdf5"
if layers is None:
layers = range(len(self.layers))

f = h5py.File(filename, mode=mode)
epoch_name = 'epoch{:d}'.format(self.n_epochs())
epoch_group = f.create_group(epoch_name)

def _save(layer, data_name, layer_name, attr):
try:
data = getattr(layer, attr)
except AttributeError:
pass
else:
data = data.get_value()
layer_group = epoch_group.require_group(layer_name)
dataset = layer_group.create_dataset(data_name, data=data)

for layer_i in layers:
layer = self.layers[layer_i]
layer_name = 'layer{:d}'.format(layer_i)
_save(layer, 'weights', layer_name, 'W')
_save(layer, 'biases', layer_name, 'b')

f.close()

def BLSTMLayer(l_previous, num_units, **kwargs):
# setup forward and backwards LSTM layers. Note that
# LSTMLayer takes a backwards flag. The backwards flag tells
Expand Down
59 changes: 54 additions & 5 deletions neuralnilm/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from collections import OrderedDict

class Source(object):
def __init__(self, seq_length, n_seq_per_batch, n_inputs, n_outputs):
def __init__(self, seq_length, n_seq_per_batch, n_inputs, n_outputs,
X_processing_func=None):
super(Source, self).__init__()
self.seq_length = seq_length
self.n_seq_per_batch = n_seq_per_batch
Expand All @@ -19,6 +20,7 @@ def __init__(self, seq_length, n_seq_per_batch, n_inputs, n_outputs):
self.queue = Queue(maxsize=2)
self._stop = threading.Event()
self._thread = None
self.X_processing_func = X_processing_func

def start(self):
if self._thread is not None:
Expand All @@ -30,7 +32,9 @@ def start(self):
def run(self):
"""Puts training data into a Queue"""
while not self._stop.is_set():
self.queue.put(self._gen_data())
X, y = self._gen_data()
X, y = self._process_data(X, y)
self.queue.put((X, y))
self.empty_queue()
self._thread = None

Expand All @@ -46,7 +50,13 @@ def empty_queue(self):
break

def validation_data(self):
return self._gen_data(validation=True)
X, y = self._gen_data(validation=True)
return self._process_data(X, y)

def _process_data(self, X, y):
if self.X_processing_func is not None:
X = self.X_processing_func(X)
return X, y

def _gen_data(self, validation=False):
raise NotImplementedError()
Expand Down Expand Up @@ -149,7 +159,8 @@ def __init__(self, filename, appliances,
subsample_target=1,
input_padding=0,
min_off_duration=0,
skip_probability=0):
skip_probability=0,
**kwargs):
"""
Parameters
----------
Expand All @@ -170,7 +181,8 @@ def __init__(self, filename, appliances,
seq_length=seq_length,
n_seq_per_batch=5,
n_inputs=1,
n_outputs=1 if output_one_appliance else len(appliances)
n_outputs=1 if output_one_appliance else len(appliances),
**kwargs
)
self.dataset = DataSet(filename)
self.appliances = appliances
Expand Down Expand Up @@ -271,7 +283,9 @@ def _gen_single_example(self, validation=False, appliances=None):
for appliance_i, appliance in enumerate(activations.keys()):
if not np.random.binomial(n=1, p=self.skip_probability):
random_appliances.append((appliance_i, appliance))

appliances.extend(random_appliances)
appliances = list(set(appliances)) # make unique

for appliance_i, appliance in appliances:
n_activations = len(activations[appliance])
Expand Down Expand Up @@ -479,3 +493,38 @@ def quantize(data, n_bins, all_hot=True, range=(-1, 1), length=None):
hist[where:midpoint] = 1
out[i,:] = hist
return (out * 2) - 1



def standardise(X, how='range=2', mean=None, std=None, midrange=None, ptp=None):
"""Standardise.
ftp://ftp.sas.com/pub/neural/FAQ2.html#A_std_in
Parameters
----------
X : matrix
Each sample is in range [0, 1]
how : str, {'range=2', 'std=1'}
'range=2' sets midrange to 0 and enforces
all values to be in the range [-1,1]
'std=1' sets mean = 0 and std = 1
Returns
-------
new_X : matrix
Same shape as `X`. Sample is in range [lower, upper]
"""
if how == 'std=1':
if mean is None:
mean = X.mean()
if std is None:
std = X.std()
return (X - mean) / std
elif how == 'range=2':
if midrange is None:
midrange = (X.max() + X.min()) / 2
if ptp is None:
ptp = X.ptp()
return (X - midrange) / (ptp / 2)
else:
raise RuntimeError("unrecognised how '" + how + "'")
76 changes: 73 additions & 3 deletions notebooks/test_real_appliance_source.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:4f25ad7cdfab8902eff46184c96727929fbdff674d0f8ed8decc7b3db0d86f98"
"signature": "sha256:a2099498a455e1055c6841a562729a5a0b0a2653c0e753a3573dd489c7cd7244"
},
"nbformat": 3,
"nbformat_minor": 0,
Expand All @@ -17,7 +17,15 @@
],
"language": "python",
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Couldn't import dot_parser, loading of dot files will not be possible.\n"
]
}
],
"prompt_number": 1
},
{
Expand Down Expand Up @@ -120,14 +128,76 @@
"cell_type": "code",
"collapsed": false,
"input": [
"plt.plot(X[0,:,:])\n",
"plt.plot(X2[2,:,:])\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from neuralnilm.source import standardise"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X.mean()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"0.059526937853107344"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X.std()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"0.12719669533909331"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X2 = standardise(X, how='std=1', mean=0.06, std=0.18)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
Expand Down
Loading

0 comments on commit 342f5e9

Please sign in to comment.