Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose alpha and theta type for parametrized activations #1069

Merged
merged 5 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example-models
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, this got in by accident from the QONNX PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was not supposed to include that--I thought I was being careful. I had updated example-models in my work area. I am not sure if they are the final ones, either. I can try to recreate this without the example-models change if you prefer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is fine to remain. We'll update that pointer anyway.

29 changes: 26 additions & 3 deletions hls4ml/backends/catapult/passes/core_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,15 @@ def format(self, node):
typedef {table_t.name} table_t;
}};\n"""

param_activ_config_template = """struct {type}_config{index} : nnet::activ_config {{
static const unsigned n_in = {n_in};
static const unsigned table_size = {table_size};
static const unsigned io_type = nnet::{iotype};
static const unsigned reuse_factor = {reuse};
typedef {table_t.name} table_t;
typedef {param_t.name} param_t;
}};\n"""

hard_activ_config_template = """struct {type}_config{index} {{
static const unsigned n_in = {n_in};
static const {slope_t.name} slope;
Expand All @@ -140,14 +149,16 @@ def format(self, node):
}};\n"""

activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});'
param_activ_function_template = (
'nnet::{activation}<{input_t}, {param_t.name}, {output_t}, {config}>({input}, {param}, {output});'
)

activ_include_list = ['nnet_utils/nnet_activation.h', 'nnet_utils/nnet_activation_stream.h']


class ActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__((Activation, ParametrizedActivation, PReLU))
super().__init__(Activation)
self.template = activ_config_template

def format(self, node):
Expand All @@ -157,6 +168,18 @@ def format(self, node):
return self.template.format(**params)


class ParamActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__((ParametrizedActivation, PReLU))
self.template = param_activ_config_template

def format(self, node):
params = self._default_config_params(node)
params['type'] = node.get_attr('activation')

return self.template.format(**params)


class HardActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__(HardActivation)
Expand Down Expand Up @@ -210,7 +233,7 @@ def __init__(self):
def format(self, node):
params = self._default_function_params(node)
params['activation'] = node.get_attr('activation').lower()
params['param'] = node.get_weights('alpha').name
params['param'] = node.get_weights('param').name
params['config'] = '{}_config{}'.format(node.get_attr('activation'), node.index)

return self.template.format(**params)
29 changes: 26 additions & 3 deletions hls4ml/backends/quartus/passes/core_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ def format(self, node):
typedef {table_t.name} table_t;
}};\n"""

param_activ_config_template = """struct {type}_config{index} : nnet::activ_config {{
static const unsigned n_in = {n_in};
static const unsigned table_size = {table_size};
static const unsigned io_type = nnet::{iotype};
static const unsigned reuse_factor = {reuse};
typedef {table_t.name} table_t;
typedef {param_t.name} param_t;
}};\n"""

hard_activ_config_template = """struct {type}_config{index} {{
static const unsigned n_in = {n_in};
static const {slope_t.name} slope;
Expand All @@ -146,14 +155,16 @@ def format(self, node):
}};\n"""

activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});'
param_activ_function_template = (
'nnet::{activation}<{input_t}, {param_t.name}, {output_t}, {config}>({input}, {param}, {output});'
)

activ_include_list = ['nnet_utils/nnet_activation.h', 'nnet_utils/nnet_activation_stream.h']


class ActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__((Activation, ParametrizedActivation, PReLU, UnaryLUT))
super().__init__((Activation, UnaryLUT))
self.template = activ_config_template

def format(self, node):
Expand All @@ -163,6 +174,18 @@ def format(self, node):
return self.template.format(**params)


class ParamActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__((ParametrizedActivation, PReLU))
self.template = param_activ_config_template

def format(self, node):
params = self._default_config_params(node)
params['type'] = node.get_attr('activation')

return self.template.format(**params)


class HardActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__(HardActivation)
Expand Down Expand Up @@ -216,7 +239,7 @@ def __init__(self):
def format(self, node):
params = self._default_function_params(node)
params['activation'] = node.get_attr('activation').lower()
params['param'] = node.get_weights('alpha').name
params['param'] = node.get_weights('param').name
params['config'] = '{}_config{}'.format(node.get_attr('activation'), node.index)

return self.template.format(**params)
29 changes: 26 additions & 3 deletions hls4ml/backends/vivado/passes/core_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ def format(self, node):
typedef {table_t.name} table_t;
}};\n"""

param_activ_config_template = """struct {type}_config{index} : nnet::activ_config {{
static const unsigned n_in = {n_in};
static const unsigned table_size = {table_size};
static const unsigned io_type = nnet::{iotype};
static const unsigned reuse_factor = {reuse};
typedef {table_t.name} table_t;
typedef {param_t.name} param_t;
}};\n"""

hard_activ_config_template = """struct {type}_config{index} {{
static const unsigned n_in = {n_in};
static const {slope_t.name} slope;
Expand All @@ -138,14 +147,16 @@ def format(self, node):
}};\n"""

activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});'
param_activ_function_template = (
'nnet::{activation}<{input_t}, {param_t.name}, {output_t}, {config}>({input}, {param}, {output});'
)

activ_include_list = ['nnet_utils/nnet_activation.h', 'nnet_utils/nnet_activation_stream.h']


class ActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__((Activation, ParametrizedActivation, PReLU, UnaryLUT))
super().__init__((Activation, UnaryLUT))
self.template = activ_config_template

def format(self, node):
Expand All @@ -155,6 +166,18 @@ def format(self, node):
return self.template.format(**params)


class ParamActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__((ParametrizedActivation, PReLU))
self.template = param_activ_config_template

def format(self, node):
params = self._default_config_params(node)
params['type'] = node.get_attr('activation')

return self.template.format(**params)


class HardActivationConfigTemplate(LayerConfigTemplate):
def __init__(self):
super().__init__(HardActivation)
Expand Down Expand Up @@ -208,7 +231,7 @@ def __init__(self):
def format(self, node):
params = self._default_function_params(node)
params['activation'] = node.get_attr('activation').lower()
params['param'] = node.get_weights('alpha').name
params['param'] = node.get_weights('param').name
params['config'] = '{}_config{}'.format(node.get_attr('activation'), node.index)

return self.template.format(**params)
2 changes: 1 addition & 1 deletion hls4ml/converters/keras/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def parse_activation_layer(keras_layer, input_names, input_shapes, data_reader):
elif layer['class_name'] == 'ReLU':
layer['class_name'] = 'Activation'
elif layer['class_name'] == 'PReLU':
layer['alpha_data'] = get_weights_data(data_reader, layer['name'], 'alpha')
layer['param_data'] = get_weights_data(data_reader, layer['name'], 'alpha')

if layer['class_name'] == 'Activation' and layer['activation'] == 'softmax':
layer['class_name'] = 'Softmax'
Expand Down
2 changes: 1 addition & 1 deletion hls4ml/converters/pytorch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def parse_activation_layer(operation, layer_name, input_names, input_shapes, nod
if layer['class_name'] == 'ELU':
layer['activ_param'] = class_object.alpha
if layer['class_name'] == 'PReLU':
layer['alpha_data'] = class_object.weight.data.numpy()
layer['param_data'] = class_object.weight.data.numpy()
if layer['class_name'] == 'Threshold':
layer['activ_param'] = class_object.threshold
layer['class_name'] = 'ThresholdedReLU'
Expand Down
20 changes: 19 additions & 1 deletion hls4ml/model/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,17 @@ def initialize(self):


class ParametrizedActivation(Activation):
_expected_attributes = [
Attribute('n_in'),
Attribute('activation', value_type=str),
TypeAttribute('param'),
]

def initialize(self):
super().initialize()
param_t = NamedType(*reversed(self.model.config.get_precision(self, 'param')))
self.set_attr('param_t', param_t)

def _get_act_function_name(self):
act = self.get_attr('activation').lower()
if act == 'leakyrelu':
Expand Down Expand Up @@ -882,9 +893,16 @@ def initialize(self):


class PReLU(Activation):
_expected_attributes = [
Attribute('n_in'),
Attribute('activation', value_type=str),
WeightAttribute('param'),
TypeAttribute('param'),
]

def initialize(self):
super().initialize()
self.add_weights_variable(name='alpha', var_name='a{index}')
self.add_weights_variable(name='param', var_name='a{index}')


class Softmax(Activation):
Expand Down
14 changes: 14 additions & 0 deletions hls4ml/model/optimizer/passes/infer_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def _infer_precision(self, node, types_to_infer):
if node_class in ['SimpleRNN', 'LSTM', 'GRU']:
return self._infer_rnn_precision(node, types_to_infer)

if node_class in ['ParametrizedActivation']:
return self._infer_par_act_precision(node, types_to_infer)

# What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent
# this in config_from_* functions

Expand Down Expand Up @@ -557,3 +560,14 @@ def _infer_rnn_precision(self, node, types_to_infer):
inferred_types.append(f'{weightvar}_t')

return inferred_types

def _infer_par_act_precision(self, node, types_to_infer):
inferred_types = []

# for now, only set if for threshold relu
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not ELU while we're at it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left then to be the default values, though you can configure them otherwise manually. Is there a better choice?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with that, my comment was mostly that the comment in the code suggests this is only applicable to thresholded relu while it actually works for three layers (leaky, threholded and elu)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can modify the comment saying that the others are left to the default precision.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the idea of configurable threshold is good in priciple. However I think this way we risk of having an inconsistency between parametrized activations. Thresholded relu gets to have the same type as input, which is the current behavior anyway, but leaky and elu get the default (which may be different from what is the input to the current layer), and prelu is not even considered so it gets the default that way. Why not handle all of them to have a consistent behavior (whatever that is, may be input precision for continuity, may be default precision)? Also, why use input precision when we can be smart and figure out the required precision given that you have access to the activ_param to figure out exactly how many bits you need to represent that number/array.

Copy link
Contributor Author

@jmitrevs jmitrevs Oct 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They have a fundamentally different function. Threshold compares the input values to a threshold, so it makes sense for it to be the same type as the input. One can tune it if preferred, but there is an implicit connection with the input type. If the input type logically would have units, the threshold would have the same units. The others are scaling factors, so they have no connection to the input type at all. It just doesn't make sense to make them the same as the input type. There is a fundamental difference. I did consider adding prelu to the match on line 87, but then _infer_par_act_precision would just ignore it and have it set the default anyway, so I didn't add it to the match. But I am not fundamentally opposed to that.

Copy link
Contributor Author

@jmitrevs jmitrevs Oct 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think logically, for threshold comparisons the same type makes sense, so it seems like a good setting, I believe better than the default. For scaling factors, we have no guidance, so the default precision makes sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the suggestion about looking at the activ_param is interesting. I have to think about it. One can set the range, but not necessarily the number of bits.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a scale (and threshold) that is a scalar the guidance is simple, it's a number for which we can figure out the best precision to store (without using too many decimal bits to match the original stored in float). Does get trickier for the case of PReLU where we would need a smarter way to decide what precision suits best for the most of the array. I undestand your logic for the current behavior. I'm fine with merging this as-is and do a "smarter" way as a follow-up

Copy link
Contributor Author

@jmitrevs jmitrevs Oct 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I think all activations can be updated. The output type can be unsigned for relu (but otherwise matching the input type), restricted in range for sigmoid and tanh, etc. It may be good to have another pull request that does precision propagation for all activations.

if 'param_t' in inferred_types and self.get_attr('activation').lower() == 'thresholdedrelu':
in_type = node.get_input_variable().type.precision
node.attributes['param_t'].type = in_type
inferred_types.append('param_t')

return inferred_types
20 changes: 10 additions & 10 deletions hls4ml/templates/catapult/nnet_utils/nnet_activation.h
Original file line number Diff line number Diff line change
Expand Up @@ -686,8 +686,8 @@ void hard_tanh(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
// *************************************************
// Leaky RELU Activation
// *************************************************
template <class data_T, class res_T, typename CONFIG_T>
void leaky_relu(data_T data[CONFIG_T::n_in], data_T alpha, res_T res[CONFIG_T::n_in]) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void leaky_relu(data_T data[CONFIG_T::n_in], param_T alpha, res_T res[CONFIG_T::n_in]) {
//#pragma HLS PIPELINE

data_T datareg;
Expand All @@ -703,8 +703,8 @@ void leaky_relu(data_T data[CONFIG_T::n_in], data_T alpha, res_T res[CONFIG_T::n
// *************************************************
// Thresholded RELU Activation
// *************************************************
template <class data_T, class res_T, typename CONFIG_T>
void thresholded_relu(data_T data[CONFIG_T::n_in], data_T theta, res_T res[CONFIG_T::n_in]) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void thresholded_relu(data_T data[CONFIG_T::n_in], param_T theta, res_T res[CONFIG_T::n_in]) {
//#pragma HLS PIPELINE

data_T datareg;
Expand Down Expand Up @@ -917,8 +917,8 @@ template <typename CONFIG_T, int N_TABLE> void init_elu_table(typename CONFIG_T:

#ifndef USE_AC_MATH

template <class data_T, class res_T, typename CONFIG_T>
void elu(data_T data[CONFIG_T::n_in], const res_T alpha, res_T res[CONFIG_T::n_in]) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void elu(data_T data[CONFIG_T::n_in], const param_T alpha, res_T res[CONFIG_T::n_in]) {
// Initialize the lookup table
#ifdef __HLS_SYN__
bool initialized = false;
Expand Down Expand Up @@ -953,8 +953,8 @@ void elu(data_T data[CONFIG_T::n_in], const res_T alpha, res_T res[CONFIG_T::n_i

#else

template <class data_T, class res_T, typename CONFIG_T>
void elu(data_T data[CONFIG_T::n_in], const res_T alpha, res_T res[CONFIG_T::n_in]) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void elu(data_T data[CONFIG_T::n_in], const param_T alpha, res_T res[CONFIG_T::n_in]) {
for (int ii = 0; ii < CONFIG_T::n_in; ii++) {
ac_math::ac_elu_pwl(data[ii], res[ii], alpha);
}
Expand Down Expand Up @@ -1045,8 +1045,8 @@ template <class data_T, class res_T, typename CONFIG_T> void selu(data_T data[CO
// *************************************************
// PReLU Activation
// *************************************************
template <class data_T, class res_T, typename CONFIG_T>
void prelu(data_T data[CONFIG_T::n_in], data_T alpha[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void prelu(data_T data[CONFIG_T::n_in], param_T alpha[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
//#pragma HLS PIPELINE

data_T datareg;
Expand Down
20 changes: 10 additions & 10 deletions hls4ml/templates/catapult/nnet_utils/nnet_activation_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -545,8 +545,8 @@ template <class data_T, class res_T, typename CONFIG_T> void hard_tanh(ac_channe
// *************************************************
// Leaky RELU Activation
// *************************************************
template <class data_T, class res_T, typename CONFIG_T>
void leaky_relu(ac_channel<data_T> &data, typename data_T::value_type alpha, ac_channel<res_T> &res) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void leaky_relu(ac_channel<data_T> &data, param_T alpha, ac_channel<res_T> &res) {
LeakyReLUActLoop:
for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
//#pragma HLS PIPELINE
Expand All @@ -571,8 +571,8 @@ void leaky_relu(ac_channel<data_T> &data, typename data_T::value_type alpha, ac_
// Thresholded RELU Activation
// *************************************************

template <class data_T, class res_T, typename CONFIG_T>
void thresholded_relu(ac_channel<data_T> &data, typename data_T::value_type theta, ac_channel<res_T> &res) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void thresholded_relu(ac_channel<data_T> &data, param_T theta, ac_channel<res_T> &res) {
ThresholdedReLUActLoop:
for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
//#pragma HLS PIPELINE
Expand Down Expand Up @@ -720,8 +720,8 @@ template <class data_T, class res_T, typename CONFIG_T> void softsign(ac_channel

#ifndef USE_AC_MATH

template <class data_T, class res_T, typename CONFIG_T>
void elu(ac_channel<data_T> &data, typename data_T::value_type alpha, ac_channel<res_T> &res) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void elu(ac_channel<data_T> &data, param_T alpha, ac_channel<res_T> &res) {
// Initialize the lookup table
#ifdef __HLS_SYN__
bool initialized = false;
Expand Down Expand Up @@ -763,8 +763,8 @@ void elu(ac_channel<data_T> &data, typename data_T::value_type alpha, ac_channel
}

#else
template <class data_T, class res_T, typename CONFIG_T>
void elu(ac_channel<data_T> &data, typename data_T::value_type alpha, ac_channel<res_T> &res) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void elu(ac_channel<data_T> &data, param_T alpha, ac_channel<res_T> &res) {
EluActLoop:
for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
data_T in_data = data.read();
Expand Down Expand Up @@ -845,8 +845,8 @@ template <class data_T, class res_T, typename CONFIG_T> void selu(ac_channel<dat
// *************************************************
// PReLU Activation
// *************************************************
template <class data_T, class res_T, typename CONFIG_T>
void prelu(ac_channel<data_T> &data, typename data_T::value_type alpha[CONFIG_T::n_in], ac_channel<res_T> &res) {
template <class data_T, class param_T, class res_T, typename CONFIG_T>
void prelu(ac_channel<data_T> &data, const param_T alpha[CONFIG_T::n_in], ac_channel<res_T> &res) {
PReLUActLoop:
for (int i = 0; i < CONFIG_T::n_in / res_T::size; i++) {
//#pragma HLS PIPELINE
Expand Down
Loading
Loading