diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_pooling.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_pooling.h index cb602b712f..d4ae915335 100644 --- a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_pooling.h +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_pooling.h @@ -83,18 +83,17 @@ struct pooling1d_config { }; template void pooling1d_cl(const data_T &data, res_T &res) { - // For 'same' padding, increase input width by left- and right-side padding - // For 'valid' padding, reduce input width to area covered by pooling function - static constexpr int padded_width = (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) - ? (CONFIG_T::n_in / CONFIG_T::stride_width * CONFIG_T::stride_width) - : (CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right); + // Add padding and reduce input width to area covered by pooling function + static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right; + static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width; FiltLoop: #pragma unroll [[intel::disable_loop_pipelining]] for (int filt = 0; filt < CONFIG_T::n_filt; filt++) { InputWidthLoop: #pragma unroll - [[intel::disable_loop_pipelining]] for (int inp_col = 0; inp_col < padded_width; inp_col += CONFIG_T::stride_width) { + [[intel::disable_loop_pipelining]] for (int inp_col = 0; inp_col < restricted_padded_width; + inp_col += CONFIG_T::stride_width) { [[intel::fpga_register]] typename data_T::value_type pool[CONFIG_T::pool_width]; // Keep track of number of pixels in image vs padding region; needed for rescaling Average Pooling @@ -103,7 +102,8 @@ template void pooling1d_cl(const PoolWidthLoop: #pragma unroll [[intel::disable_loop_pipelining]] for (int pool_col = 0; pool_col < CONFIG_T::stride_width; pool_col++) { - if (inp_col + pool_col < CONFIG_T::pad_left || inp_col + pool_col >= (padded_width - CONFIG_T::pad_right)) { + if (inp_col + pool_col < CONFIG_T::pad_left || + inp_col + pool_col >= (full_padded_width - CONFIG_T::pad_right)) { // Add padding pool[pool_col] = pad_val(); if (CONFIG_T::count_pad) @@ -170,25 +170,22 @@ struct pooling2d_config { }; template void pooling2d_cl(const data_T &data, res_T &res) { - // For 'same' padding, increase input width by left- and right-side padding - // For 'valid' padding, reduce input width to area covered by pooling function - static constexpr int padded_width = (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) - ? (CONFIG_T::in_width / CONFIG_T::stride_width * CONFIG_T::stride_width) - : (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right); - static constexpr int padded_height = (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0) - ? (CONFIG_T::in_height / CONFIG_T::stride_height * CONFIG_T::stride_height) - : (CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom); + // Add padding and reduce input width to area covered by pooling function + static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right; + static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom; + static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width; + static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height; FiltLoop: #pragma unroll [[intel::disable_loop_pipelining]] for (int filt = 0; filt < CONFIG_T::n_filt; filt++) { InputHeightLoop: #pragma unroll - [[intel::disable_loop_pipelining]] for (int inp_col = 0; inp_col < padded_height; + [[intel::disable_loop_pipelining]] for (int inp_col = 0; inp_col < restricted_padded_height; inp_col += CONFIG_T::stride_height) { InputWidthLoop: #pragma unroll - [[intel::disable_loop_pipelining]] for (int inp_width = 0; inp_width < padded_width; + [[intel::disable_loop_pipelining]] for (int inp_width = 0; inp_width < restricted_padded_width; inp_width += CONFIG_T::stride_width) { [[intel::fpga_register]] typename data_T::value_type pool[CONFIG_T::pool_height * CONFIG_T::pool_width]; @@ -203,9 +200,9 @@ template void pooling2d_cl(const [[intel::disable_loop_pipelining]] for (int pool_row = 0; pool_row < CONFIG_T::stride_width; pool_row++) { if (inp_col + pool_col < CONFIG_T::pad_top || - inp_col + pool_col >= (padded_height - CONFIG_T::pad_bottom) || + inp_col + pool_col >= (full_padded_height - CONFIG_T::pad_bottom) || inp_width + pool_row < CONFIG_T::pad_left || - inp_width + pool_row >= (padded_width - CONFIG_T::pad_right)) { + inp_width + pool_row >= (full_padded_width - CONFIG_T::pad_right)) { // Add padding pool[pool_col * CONFIG_T::stride_width + pool_row] = pad_val(); diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h index bbfc0908ef..6bc254db9f 100644 --- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h +++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h @@ -122,11 +122,9 @@ struct pooling1d_config { template void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONFIG_T::n_out * CONFIG_T::n_filt]) { - // For 'same' padding, increase input width by left- and right-side padding - // For 'valid' padding, reduce input width to area covered by pooling function - static constexpr int padded_width = (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) - ? (CONFIG_T::n_in / CONFIG_T::stride_width * CONFIG_T::stride_width) - : (CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right); + // Add padding and reduce input width to area covered by pooling function + static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right; + static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width; FiltLoop: #pragma unroll @@ -135,7 +133,7 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF InputWidthLoop: #pragma unroll #pragma disable_loop_pipelining - for (int inp_col = 0; inp_col < padded_width; inp_col += CONFIG_T::stride_width) { + for (int inp_col = 0; inp_col < restricted_padded_width; inp_col += CONFIG_T::stride_width) { hls_register data_T pool[CONFIG_T::pool_width]; // Keep track of number of pixels in image vs padding region; needed for rescaling Average Pooling @@ -145,7 +143,8 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF #pragma unroll #pragma disable_loop_pipelining for (int pool_col = 0; pool_col < CONFIG_T::stride_width; pool_col++) { - if (inp_col + pool_col < CONFIG_T::pad_left || inp_col + pool_col >= (padded_width - CONFIG_T::pad_right)) { + if (inp_col + pool_col < CONFIG_T::pad_left || + inp_col + pool_col >= (full_padded_width - CONFIG_T::pad_right)) { // Add padding pool[pool_col] = pad_val(); if (CONFIG_T::count_pad) @@ -220,14 +219,11 @@ struct pooling2d_config { template void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_filt], res_T res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt]) { - // For 'same' padding, increase input width by left- and right-side padding - // For 'valid' padding, reduce input width to area covered by pooling function - static constexpr int padded_width = (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) - ? (CONFIG_T::in_width / CONFIG_T::stride_width * CONFIG_T::stride_width) - : (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right); - static constexpr int padded_height = (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0) - ? (CONFIG_T::in_height / CONFIG_T::stride_height * CONFIG_T::stride_height) - : (CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom); + // Add padding and reduce input width to area covered by pooling function + static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right; + static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom; + static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width; + static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height; FiltLoop: #pragma unroll @@ -236,11 +232,11 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_ InputHeightLoop: #pragma unroll #pragma disable_loop_pipelining - for (int inp_col = 0; inp_col < padded_height; inp_col += CONFIG_T::stride_height) { + for (int inp_col = 0; inp_col < restricted_padded_height; inp_col += CONFIG_T::stride_height) { InputWidthLoop: #pragma unroll #pragma disable_loop_pipelining - for (int inp_width = 0; inp_width < padded_width; inp_width += CONFIG_T::stride_width) { + for (int inp_width = 0; inp_width < restricted_padded_width; inp_width += CONFIG_T::stride_width) { hls_register data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width]; // Keep track of number of pixels in image vs padding region; needed for rescaling Average Pooling @@ -255,9 +251,9 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_ #pragma disable_loop_pipelining for (int pool_row = 0; pool_row < CONFIG_T::stride_width; pool_row++) { if (inp_col + pool_col < CONFIG_T::pad_top || - inp_col + pool_col >= (padded_height - CONFIG_T::pad_bottom) || + inp_col + pool_col >= (full_padded_height - CONFIG_T::pad_bottom) || inp_width + pool_row < CONFIG_T::pad_left || - inp_width + pool_row >= (padded_width - CONFIG_T::pad_right)) { + inp_width + pool_row >= (full_padded_width - CONFIG_T::pad_right)) { // Add padding pool[pool_col * CONFIG_T::stride_width + pool_row] = pad_val(); if (CONFIG_T::count_pad)