Skip to content

Commit

Permalink
[GPU] remove code duplication in mvn_gpu_bfyx_opt kernel (#28284)
Browse files Browse the repository at this point in the history
### Details:
 - spotted code duplication

### Tickets:
 -
  • Loading branch information
michal-miotk authored Jan 8, 2025
1 parent 97a2148 commit 3526fa5
Showing 1 changed file with 7 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,62 +25,43 @@ KERNEL (mvn_gpu_bfyx_opt)(

const uint data_set_offset = data_set_idx * data_set_size;
const uint my_data_offset = data_set_offset + in_data_set_idx;
uint iters_num = items_num;
if (in_data_set_idx < leftovers)
++iters_num;

float my_sum = 0;
float tmp;

//each WI reads items_num consecutive items from batch*feature
for (uint i=0; i<items_num; ++i)
for (uint i=0; i<iters_num; ++i)
{
my_sum += (float)input[my_data_offset + i * workers_per_data_set];
}

if (in_data_set_idx < leftovers)
{
my_sum += (float)input[data_set_offset + workers_per_data_set * items_num + in_data_set_idx];
}

my_sum = work_group_reduce_add(my_sum) / data_set_size;

#if NORMALIZE_VARIANCE == 0
for (uint i=0; i<items_num; ++i) {
for (uint i=0; i<iters_num; ++i) {
uint iteration_in_data_set_offset = i * workers_per_data_set;
ACTIVATION_TYPE result = TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
if (in_data_set_idx < leftovers) {
uint iteration_in_data_set_offset = items_num * workers_per_data_set;
ACTIVATION_TYPE result = TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
#else

float my_variance = 0.f;
//each WI reads items_num consecutive items from batch*feature
for (uint i=0; i<items_num; ++i)
for (uint i=0; i<iters_num; ++i)
{
tmp = (float)input[my_data_offset + i * workers_per_data_set];
tmp -= my_sum;
my_variance = fma(tmp, tmp, my_variance);
}

if (in_data_set_idx < leftovers)
{
tmp = (float)input[data_set_offset + workers_per_data_set * items_num + in_data_set_idx];
tmp -= my_sum;
my_variance = fma(tmp, tmp, my_variance);
}

my_variance = work_group_reduce_add(my_variance);

if (in_data_set_idx == 0)
Expand All @@ -96,24 +77,14 @@ KERNEL (mvn_gpu_bfyx_opt)(

my_variance = work_group_broadcast(my_variance, 0);

for (uint i=0; i<items_num; ++i) {
for (uint i=0; i<iters_num; ++i) {
uint iteration_in_data_set_offset = i * workers_per_data_set;
ACTIVATION_TYPE result = (TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum)) * TO_ACTIVATION_TYPE(my_variance);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
if (in_data_set_idx < leftovers) {
uint iteration_in_data_set_offset = items_num * workers_per_data_set;
ACTIVATION_TYPE result = (TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum)) * TO_ACTIVATION_TYPE(my_variance);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
#endif
Expand Down

0 comments on commit 3526fa5

Please sign in to comment.