-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support for explicit padding and dilations in 2D CNN layers #138
base: main
Are you sure you want to change the base?
Changes from 7 commits
46fcb28
abf7a4d
149bc30
6d0cf77
668f192
4a83a22
0c7a1ce
f47907c
8639bd4
623aaf6
b9603e3
8bc6e2f
8be2198
66d28cd
0d40529
97ba41b
3e4807b
e17df8a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,16 +190,16 @@ | |
node_weights = self._initializers[in_1] | ||
|
||
if len(next_nodes) != 1: | ||
raise ValueError( | ||
f"Next nodes must have length 1, {next_nodes} has length {len(next_nodes)}" | ||
) | ||
|
||
# expect 'Add' node ahead | ||
type_, node, maybe_next_nodes = self._nodes[next_nodes[0]] | ||
if type_ != "node": | ||
raise TypeError(f"Expected a node next, got a {type_} instead.") | ||
if node.op_type != "Add": | ||
raise ValueError( | ||
f"The first node to be consumed, {node.name}, is a {node.op_type} node. Only Add nodes are supported." | ||
) | ||
|
||
|
@@ -212,16 +212,16 @@ | |
elif in_1 in self._initializers: | ||
node_biases = self._initializers[in_1] | ||
else: | ||
raise ValueError(f"Node inputs were not found in graph initializers.") | ||
|
||
if len(node_weights.shape) != 2: | ||
raise ValueError(f"Node weights must be a 2-dimensional matrix.") | ||
if node_weights.shape[1] != node_biases.shape[0]: | ||
raise ValueError( | ||
f"Node weights has {node_weights.shape[1]} columns; node biases has {node_biases.shape[0]} rows. These must be equal." | ||
) | ||
if len(node.output) != 1: | ||
raise ValueError( | ||
f"Node output is {node.output} but should be a single value." | ||
) | ||
|
||
|
@@ -337,47 +337,54 @@ | |
strides = attr["strides"] | ||
# check only kernel shape and stride are set | ||
if attr["kernel_shape"] != kernel_shape: | ||
raise ValueError( | ||
f"Kernel shape attribute {attr['kernel_shape']} does not match initialized kernel shape {kernel_shape}." | ||
) | ||
if len(kernel_shape) != len(strides): | ||
raise ValueError( | ||
f"Initialized kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. Strides attribute has {len(strides)} dimensions. These must be equal." | ||
) | ||
if len(input_output_size) != len(kernel_shape) + 1: | ||
raise ValueError( | ||
f"Input/output size ({input_output_size}) must have one more dimension than initialized kernel shape ({kernel_shape})." | ||
) | ||
|
||
# Check input, output have correct dimensions | ||
if biases.shape != (out_channels,): | ||
raise ValueError( | ||
f"Biases shape {biases.shape} must match output weights channels {(out_channels,)}." | ||
) | ||
if in_channels != input_output_size[0]: | ||
raise ValueError( | ||
f"Input/output size ({input_output_size}) first dimension must match input weights channels ({in_channels})." | ||
) | ||
|
||
# TODO: need to check pads and dilations also have correct dimensions. Also should | ||
# add support for autopad. | ||
if "pads" in attr: | ||
pads = attr["pads"] | ||
else: | ||
pads = 2 * (len(input_output_size) - 1) * [0] | ||
|
||
if "dilations" in attr: | ||
dilations = attr["dilations"] | ||
else: | ||
dilations = (len(input_output_size) - 1) * [1] | ||
|
||
# Other attributes are not supported | ||
if "dilations" in attr and attr["dilations"] != [1, 1]: | ||
raise ValueError( | ||
f"{node} has non-identity dilations ({attr['dilations']}). This is not supported." | ||
) | ||
if attr["group"] != 1: | ||
raise ValueError( | ||
f"{node} has multiple groups ({attr['group']}). This is not supported." | ||
) | ||
if "pads" in attr and np.any(attr["pads"]): | ||
raise ValueError( | ||
f"{node} has non-zero pads ({attr['pads']}). This is not supported." | ||
) | ||
|
||
# generate new nodes for the node output | ||
padding = 0 | ||
padding = [ | ||
pads[i] + pads[i + len(input_output_size) - 1] | ||
for i in range(len(input_output_size) - 1) | ||
] | ||
output_size = [out_channels] | ||
for w, k, s in zip(input_output_size[1:], kernel_shape, strides): | ||
new_w = int((w - k + 2 * padding) / s) + 1 | ||
for w, k, s, p in zip(input_output_size[1:], kernel_shape, strides, padding): | ||
new_w = int((w - k + p) / s) + 1 | ||
output_size.append(new_w) | ||
|
||
activation = "linear" | ||
|
@@ -392,7 +399,7 @@ | |
# convolute image one channel at the time | ||
# expect 2d image with channels | ||
if len(input_output_size) != 3: | ||
raise ValueError( | ||
f"Expected a 2D image with channels, got {input_output_size}." | ||
) | ||
|
||
|
@@ -401,6 +408,8 @@ | |
output_size, | ||
strides, | ||
weights, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=transformer, | ||
) | ||
|
@@ -440,7 +449,7 @@ | |
|
||
# ONNX network should not contain indices output from MaxPool - not supported by OMLT | ||
if len(node.output) != 1: | ||
raise ValueError( | ||
f"The ONNX contains indices output from MaxPool. This is not supported by OMLT." | ||
) | ||
if len(node.input) != 1: | ||
|
@@ -456,7 +465,7 @@ | |
# this means there is an extra dimension for number of batches | ||
# batches not supported, so only accept if they're not there or there is only 1 batch | ||
if input_output_size[0] != 1: | ||
raise ValueError( | ||
f"{node.name} has {input_output_size[0]} batches, only a single batch is supported." | ||
) | ||
input_output_size = input_output_size[1:] | ||
|
@@ -467,27 +476,21 @@ | |
kernel_depth = attr["kernel_shape"][0] | ||
kernel_shape = attr["kernel_shape"][1:] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The kernel size for maxpool2d does not include input/output channel. There are only two options for kernel size: (i) one integer k, then the kernel size is [1, 1, k, k]; (ii) two integers [r, c], then the kernel size is [1, 1, r, c]. We need to change lines 478-479 to get correct kernel_depth (which should be 1) and kernel_shape (which should be [k, k] or [r, c]). After fixing that, the checking in 490-492 makes sense. Otherwise, we will get the error message like "Kernel shape [4] has 1 dimensions. Strides attribute has 2 dimensions. These must be equal." There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do I read the correct kernel_depth and kernel_shape from the node attribute? Do I need to count the dimensions to determine whether it's been given as k, [r,c], or [1,1,r,c]? Or do I just have the indices wrong here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not need to determine which case since ONNX already standardizes the dimensions for kernel. For maxpooling2d, the node atttribute will give [r, c] (or [k,k] for case(i)), so we just need to define kernel_shape as attr["kernel_shape"]. Since the output channels equal to the input channels, just define kernel_depth as in_channels will be fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (This code predates this PR.) I've now set kernel_depth to in_channels. In the maxpool_2d.onnx file used in the test_onnx_parser/test_maxpool test, attr["kernel_shape"] for node1 is (3, 2, 3). I'm not sure what each of these dimensions represents, but if I take all 3 it fails. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think kernel_shape = attr["kernel_shape"] since the first dimension is no longer the depth in maxpooling. Can you put some tests on this part to see which one is correct? |
||
strides = attr["strides"] if "strides" in attr else [1] * len(kernel_shape) | ||
pads = attr["pads"] if "pads" in attr else None | ||
dilations = attr["dilations"] if "dilations" in attr else None | ||
|
||
# check only kernel shape, stride, storage order are set | ||
# everything else is not supported | ||
if "dilations" in attr and attr["dilations"] != [1, 1]: | ||
raise ValueError( | ||
f"{node.name} has non-identity dilations ({attr['dilations']}). This is not supported." | ||
) | ||
if "pads" in attr and np.any(attr["pads"]): | ||
raise ValueError( | ||
f"{node.name} has non-zero pads ({attr['pads']}). This is not supported." | ||
) | ||
if ("auto_pad" in attr) and (attr["auto_pad"] != "NOTSET"): | ||
raise ValueError( | ||
f"{node.name} has autopad set ({attr['auto_pad']}). This is not supported." | ||
) | ||
if len(kernel_shape) != len(strides): | ||
raise ValueError( | ||
f"Kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. Strides attribute has {len(strides)} dimensions. These must be equal." | ||
) | ||
if len(input_output_size) != len(kernel_shape) + 1: | ||
raise ValueError( | ||
f"Input/output size ({input_output_size}) must have one more dimension than kernel shape ({kernel_shape})." | ||
) | ||
|
||
|
@@ -519,6 +522,8 @@ | |
pool_func_name, | ||
tuple(kernel_shape), | ||
kernel_depth, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=transformer, | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,11 +24,11 @@ | |
self, input_size, output_size, *, activation=None, input_index_mapper=None | ||
): | ||
if not isinstance(input_size, (list, tuple)): | ||
raise TypeError( | ||
f"input_size must be a list or tuple, {type(input_size)} was provided." | ||
) | ||
if not isinstance(output_size, (list, tuple)): | ||
raise TypeError( | ||
f"output_size must be a list or tuple, {type(output_size)} was provided." | ||
) | ||
self.__input_size = list(input_size) | ||
|
@@ -106,7 +106,7 @@ | |
else x[:] | ||
) | ||
if x_reshaped.shape != tuple(self.input_size): | ||
raise ValueError( | ||
f"Layer requires an input size {self.input_size}, but the input tensor had size {x_reshaped.shape}." | ||
) | ||
y = self._eval(x_reshaped) | ||
|
@@ -225,6 +225,10 @@ | |
the size of the output. | ||
strides : matrix-like | ||
stride of the kernel. | ||
pads : matrix-like | ||
Padding for the kernel. Given as [left, bottom, right, top] | ||
dilations : matrix-like | ||
Dilations of the kernel | ||
activation : str or None | ||
activation function name | ||
input_index_mapper : IndexMapper or None | ||
|
@@ -237,6 +241,8 @@ | |
output_size, | ||
strides, | ||
*, | ||
pads=None, | ||
dilations=None, | ||
activation=None, | ||
input_index_mapper=None, | ||
): | ||
|
@@ -247,12 +253,25 @@ | |
input_index_mapper=input_index_mapper, | ||
) | ||
self.__strides = strides | ||
if pads is None: | ||
self.__pads = [0, 0, 0, 0] | ||
else: | ||
self.__pads = pads | ||
if dilations is None: | ||
self.__dilations = [1, 1] | ||
else: | ||
self.__dilations = dilations | ||
|
||
@property | ||
def strides(self): | ||
"""Return the stride of the layer""" | ||
return self.__strides | ||
|
||
@property | ||
def pads(self): | ||
"""Return the padding of the layer""" | ||
return self.__pads | ||
|
||
@property | ||
def kernel_shape(self): | ||
"""Return the shape of the kernel""" | ||
|
@@ -263,6 +282,20 @@ | |
"""Return the depth of the kernel""" | ||
raise NotImplementedError() | ||
|
||
@property | ||
def dilations(self): | ||
"""Return the kernel dilation of the layer""" | ||
return self.__dilations | ||
|
||
@property | ||
def dilated_kernel_shape(self): | ||
"""Return the shape of the kernel after dilation""" | ||
dilated_dims = [ | ||
self.dilations[i] * (self.kernel_shape[i] - 1) + 1 | ||
for i in range(len(self.kernel_shape)) | ||
] | ||
return tuple(dilated_dims) | ||
|
||
def kernel_index_with_input_indexes(self, out_d, out_r, out_c): | ||
""" | ||
Returns an iterator over the index within the kernel and input index | ||
|
@@ -278,14 +311,16 @@ | |
the output column. | ||
""" | ||
kernel_d = self.kernel_depth | ||
[kernel_r, kernel_c] = self.kernel_shape | ||
[kernel_r, kernel_c] = self.dilated_kernel_shape | ||
[rows_stride, cols_stride] = self.__strides | ||
[pads_row, pads_col] = self.__pads[:2] | ||
start_in_d = 0 | ||
start_in_r = out_r * rows_stride | ||
start_in_c = out_c * cols_stride | ||
mapper = lambda x: x | ||
if self.input_index_mapper is not None: | ||
mapper = self.input_index_mapper | ||
start_in_r = out_r * rows_stride - pads_row | ||
start_in_c = out_c * cols_stride - pads_col | ||
# Defined but never used: | ||
# mapper = lambda x: x | ||
# if self.input_index_mapper is not None: | ||
# mapper = self.input_index_mapper | ||
|
||
for k_d in range(kernel_d): | ||
for k_r in range(kernel_r): | ||
|
@@ -298,7 +333,7 @@ | |
# as this could require using a partial kernel | ||
# even though we loop over ALL kernel indexes. | ||
if not all( | ||
input_index[i] < self.input_size[i] | ||
input_index[i] < self.input_size[i] and input_index[i] >= 0 | ||
for i in range(len(input_index)) | ||
): | ||
continue | ||
|
@@ -319,7 +354,7 @@ | |
def _eval(self, x): | ||
y = np.empty(shape=self.output_size) | ||
if len(self.output_size) != 3: | ||
raise ValueError( | ||
f"Output should have 3 dimensions but instead has {len(self.output_size)}" | ||
) | ||
[depth, rows, cols] = list(self.output_size) | ||
|
@@ -345,6 +380,10 @@ | |
the size of the output. | ||
strides : matrix-like | ||
stride of the kernel. | ||
pads : matrix-like | ||
Padding for the kernel. Given as [left, bottom, right, top] | ||
dilations : matrix-like | ||
Dilations of the kernel | ||
pool_func : str | ||
name of function used to pool values in a kernel to a single value. | ||
transpose : bool | ||
|
@@ -367,18 +406,22 @@ | |
kernel_shape, | ||
kernel_depth, | ||
*, | ||
pads=None, | ||
dilations=None, | ||
activation=None, | ||
input_index_mapper=None, | ||
): | ||
super().__init__( | ||
input_size, | ||
output_size, | ||
strides, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=input_index_mapper, | ||
) | ||
if pool_func_name not in PoolingLayer2D._POOL_FUNCTIONS: | ||
raise ValueError( | ||
f"Allowable pool functions are {PoolingLayer2D._POOL_FUNCTIONS}, {pool_func_name} was provided." | ||
) | ||
self._pool_func_name = pool_func_name | ||
|
@@ -421,6 +464,10 @@ | |
stride of the cross-correlation kernel. | ||
kernel : matrix-like | ||
the cross-correlation kernel. | ||
pads : matrix-like | ||
Padding for the kernel. Given as [left, bottom, right, top] | ||
dilations : matrix-like | ||
Dilations of the kernel | ||
activation : str or None | ||
activation function name | ||
input_index_mapper : IndexMapper or None | ||
|
@@ -434,17 +481,94 @@ | |
strides, | ||
kernel, | ||
*, | ||
pads=None, | ||
dilations=None, | ||
activation=None, | ||
input_index_mapper=None, | ||
): | ||
super().__init__( | ||
input_size, | ||
output_size, | ||
strides, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=input_index_mapper, | ||
) | ||
self.__kernel = kernel | ||
if self.dilations != [1, 1]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, when dilations!=[1, 1], this part will extend the kernel from shape [out_channel, in_channel, r, c] to shape [out_channel, in_channel, dr*(r-1)+1, dc*(c-1)+1] by adding zeros, where [dr, dc] are dilations. But this part of code does not correctly do this extension. For example, assume the size of kernel is [2, 1, 4, 3] and dilations = [2, 3], then the size of dilate_kernel should be [2, 1, 7, 7]. But we will get an error here due to unmatched sizes. My suggestion is directly assigning values to dilate_kernel instead of using numpy.hstack and numpy.dstack. |
||
dilate_rows = np.hstack( | ||
[ | ||
np.hstack( | ||
[ | ||
np.hstack( | ||
[ | ||
kernel[:, :, i, :].reshape( | ||
( | ||
kernel.shape[0], | ||
kernel.shape[1], | ||
1, | ||
kernel.shape[3], | ||
) | ||
), | ||
np.zeros( | ||
( | ||
kernel.shape[0], | ||
kernel.shape[1], | ||
self.dilations[0] - 1, | ||
kernel.shape[3], | ||
) | ||
), | ||
] | ||
) | ||
for i in range(kernel.shape[2] - 1) | ||
] | ||
), | ||
kernel[:, :, -1, :].reshape( | ||
(kernel.shape[0], kernel.shape[1], 1, kernel.shape[3]) | ||
), | ||
] | ||
) | ||
dilate_kernel = np.dstack( | ||
[ | ||
np.dstack( | ||
[ | ||
np.dstack( | ||
[ | ||
dilate_rows[:, :, :, i].reshape( | ||
( | ||
dilate_rows.shape[0], | ||
dilate_rows.shape[1], | ||
dilate_rows.shape[2], | ||
1, | ||
) | ||
), | ||
np.zeros( | ||
( | ||
dilate_rows.shape[0], | ||
dilate_rows.shape[1], | ||
dilate_rows.shape[2], | ||
self.dilations[1] - 1, | ||
) | ||
), | ||
] | ||
) | ||
for i in range(dilate_rows.shape[3] - 1) | ||
] | ||
), | ||
dilate_rows[:, :, :, -1].reshape( | ||
( | ||
dilate_rows.shape[0], | ||
dilate_rows.shape[1], | ||
dilate_rows.shape[2], | ||
1, | ||
) | ||
), | ||
] | ||
) | ||
self.__dilated_kernel = dilate_kernel | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line needs testing coverage There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree. This part is important. |
||
else: | ||
self.__dilated_kernel = kernel | ||
|
||
def kernel_with_input_indexes(self, out_d, out_r, out_c): | ||
""" | ||
|
@@ -481,6 +605,11 @@ | |
"""Return the cross-correlation kernel""" | ||
return self.__kernel | ||
|
||
@property | ||
def dilated_kernel(self): | ||
"""Return the dilated cross-correlation kernel""" | ||
return self.__dilated_kernel | ||
Comment on lines
+731
to
+734
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function needs to be tested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test added |
||
|
||
def __str__(self): | ||
return f"ConvLayer(input_size={self.input_size}, output_size={self.output_size}, strides={self.strides}, kernel_shape={self.kernel_shape})" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line needs test coverage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Coverage added to the non-dilation case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this PR already adds tests with dilations, old tests already cover cases without dilations. We can check the coverage after the CI issue is fixed.