diff --git a/dory/Hardware_targets/PULP/Common/Templates/main.c.t b/dory/Hardware_targets/PULP/Common/Templates/main.c.t index 4885c25f..ece05b4d 100644 --- a/dory/Hardware_targets/PULP/Common/Templates/main.c.t +++ b/dory/Hardware_targets/PULP/Common/Templates/main.c.t @@ -64,6 +64,7 @@ void application(void * arg) { #endif size_t l2_input_size = ${int(DORY_HW_graph[0].tiling_dimensions["L2"]["input_activation_memory"])}; size_t input_size = 1000000; + int initial_dir = 1; % if l3_supported: void *ram_input = ram_malloc(input_size); @@ -79,7 +80,7 @@ void application(void * arg) { % if l3_supported: ram_read(l2_buffer, ram_input, l2_input_size); % endif - ${prefix}network_run(l2_buffer, ${l2_buffer_size}, l2_buffer, ${"0" if single_input else "exec"}${f", {prefix}L2_input_h{' + exec * l2_input_size' if not single_input else ''}" if not l3_supported else ""}); + ${prefix}network_run(l2_buffer, ${l2_buffer_size}, l2_buffer, ${"0" if single_input else "exec"}, initial_dir${f", {prefix}L2_input_h{' + exec * l2_input_size' if not single_input else ''}" if not l3_supported else ""}); % if not single_input: } diff --git a/dory/Hardware_targets/PULP/Common/Templates/network.c.t b/dory/Hardware_targets/PULP/Common/Templates/network.c.t index 698eb346..a4b70247 100644 --- a/dory/Hardware_targets/PULP/Common/Templates/network.c.t +++ b/dory/Hardware_targets/PULP/Common/Templates/network.c.t @@ -104,7 +104,7 @@ void ${prefix}execute_layer_fork(void *args) { if (pi_core_id() == 0) pmsis_l1_malloc_free(layer_args->L1_buffer, ${l1_buffer}); } -struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""}) +struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec, int initial_dir${", void *L2_input_h" if not l3_supported else ""}) { struct pi_device cluster_dev = {0}; struct pi_cluster_conf conf; @@ -120,8 +120,9 @@ struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, si args[1] = (unsigned int) l2_buffer_size; args[2] = (unsigned int) l2_final_output; args[3] = (unsigned int) exec; + args[4] = (unsigned int) initial_dir; % if not l3_supported: - args[4] = (unsigned int) L2_input_h; + args[5] = (unsigned int) L2_input_h; % endif // open cluster... pi_cluster_task(&cluster_task, ${prefix}network_run_cluster, args); @@ -145,9 +146,9 @@ void ${prefix}network_run_wait(struct ${prefix}network_run_token token) % endif } -void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""}) +void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec, int initial_dir${", void *L2_input_h" if not l3_supported else ""}) { - ${prefix}network_run_wait(network_run_async(l2_buffer, l2_buffer_size, l2_final_output, exec${", L2_input_h" if not l3_supported else ""})); + ${prefix}network_run_wait(network_run_async(l2_buffer, l2_buffer_size, l2_final_output, exec, initial_dir${", L2_input_h" if not l3_supported else ""})); } void ${prefix}network_run_cluster(void *args) { @@ -156,8 +157,9 @@ void ${prefix}network_run_cluster(void *args) { size_t l2_buffer_size = (size_t) real_args[1]; void * l2_final_output = (void *) real_args[2]; int exec = (int) real_args[3]; + int dir = (int) real_args[4]; % if not l3_supported: - void * L2_input_h = (void *)real_args[4]; + void * L2_input_h = (void *)real_args[5]; % endif /* - initial buffer allocation L2 and L1 @@ -172,7 +174,6 @@ void ${prefix}network_run_cluster(void *args) { void *L3_weights_curr = L3_weights; void *bypass_activations = NULL; - int dir = 1; int residual_number = 0; int bypass_dimension = 0; % if not l3_supported: diff --git a/dory/Hardware_targets/PULP/Common/Templates/network.h.t b/dory/Hardware_targets/PULP/Common/Templates/network.h.t index 595aebaf..0ffff345 100644 --- a/dory/Hardware_targets/PULP/Common/Templates/network.h.t +++ b/dory/Hardware_targets/PULP/Common/Templates/network.h.t @@ -41,9 +41,9 @@ void ${prefix}network_terminate(); void ${prefix}network_initialize(); % endif void ${prefix}network_run_cluster(void * args); -struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""}); +struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec, int initial_dir${", void *L2_input_h" if not l3_supported else ""}); void network_run_wait(struct ${prefix}network_run_token token); -void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""}); +void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec, int initial_dir${", void *L2_input_h" if not l3_supported else ""}); void ${prefix}execute_layer_fork(void *arg); % if l3_supported and not single_input: