Skip to content

Commit

Permalink
Merge pull request #63 from pulp-platform/lukamac/pr/async-network-run
Browse files Browse the repository at this point in the history
Add asynchronous version of network run
  • Loading branch information
ABurrello authored Nov 6, 2023
2 parents 81d62ba + 4357a4c commit 5549e9a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
16 changes: 14 additions & 2 deletions dory/Hardware_targets/PULP/Common/Templates/network.c.t
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ void ${prefix}execute_layer_fork(void *args) {
if (pi_core_id() == 0) pmsis_l1_malloc_free(layer_args->L1_buffer, ${l1_buffer});
}

void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""})
struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""})
{
struct pi_device cluster_dev = {0};
struct pi_cluster_conf conf;
Expand Down Expand Up @@ -132,12 +132,24 @@ void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final
cluster_task.stack_size = ${master_stack};
cluster_task.slave_stack_size = ${slave_stack};
pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task);
pi_cluster_close(&cluster_dev);
return (struct ${prefix}network_run_token) {
.cluster_dev = cluster_dev
};
}

void ${prefix}network_run_wait(struct ${prefix}network_run_token token)
{
pi_cluster_close(&token.cluster_dev);
% if 'Perf_final' in verbose_level:
print_perf("Final", ${prefix}cycle_network_execution, ${MACs});
% endif
}

void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""})
{
${prefix}network_run_wait(network_run_async(l2_buffer, l2_buffer_size, l2_final_output, exec${", L2_input_h" if not l3_supported else ""}));
}

void ${prefix}network_run_cluster(void *args) {
unsigned int * real_args = (unsigned int *) args;
void * l2_buffer = (void *) real_args[0];
Expand Down
11 changes: 8 additions & 3 deletions dory/Hardware_targets/PULP/Common/Templates/network.h.t
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
#ifndef __${prefix.upper()}NETWORK_H__
#define __${prefix.upper()}NETWORK_H__

% if sdk == 'gap_sdk':
#include "pulp.h"
% endif
<%
l3_supported = DORY_HW_graph[0].HW_description['memory']['levels'] > 2
single_input = n_inputs==1
Expand All @@ -31,13 +28,21 @@
#include "${prefix}weights_definition.h"
% endif
#include <stddef.h>
#include "pmsis.h"


struct ${prefix}network_run_token {
struct pi_device cluster_dev;
};


% if l3_supported:
void ${prefix}network_terminate();
void ${prefix}network_initialize();
% endif
void ${prefix}network_run_cluster(void * args);
struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""});
void network_run_wait(struct ${prefix}network_run_token token);
void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""});
void ${prefix}execute_layer_fork(void *arg);

Expand Down

0 comments on commit 5549e9a

Please sign in to comment.