Skip to content

Commit

Permalink
Merge pull request #23 from davidozog/pr/SOS_v1.5.1_sync
Browse files Browse the repository at this point in the history
Sync with SOS v1.5.1
  • Loading branch information
davidozog authored Apr 29, 2022
2 parents d563c03 + 7544378 commit 3480dae
Show file tree
Hide file tree
Showing 54 changed files with 1,626 additions and 753 deletions.
20 changes: 3 additions & 17 deletions test/apps/gups.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,25 +322,11 @@ SHMEMRandomAccess(void)
uint64_t NumUpdates; /* total number of updates to table */
uint64_t ProcNumUpdates; /* number of updates per processor */

static long pSync_bcast[SHMEM_BCAST_SYNC_SIZE];
static long long int llpWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE];

static long pSync_reduce[SHMEM_REDUCE_SYNC_SIZE];
static int ipWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE];

FILE *outFile = NULL;
double *GUPs;
double *temp_GUPs;


for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1){
pSync_bcast[i] = SHMEM_SYNC_VALUE;
}

for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1){
pSync_reduce[i] = SHMEM_SYNC_VALUE;
}

SHMEMGUPs = -1;
GUPs = &SHMEMGUPs;

Expand Down Expand Up @@ -399,7 +385,7 @@ SHMEMRandomAccess(void)
if (! HPCC_PELock) sAbort = 1;

shmem_barrier_all();
shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, pSync_reduce);
shmem_int_sum_reduce(SHMEM_TEAM_WORLD, &rAbort, &sAbort, 1);
shmem_barrier_all();

if (rAbort > 0) {
Expand Down Expand Up @@ -468,7 +454,7 @@ SHMEMRandomAccess(void)
/* distribute result to all nodes */
temp_GUPs = GUPs;
shmem_barrier_all();
shmem_broadcast64(GUPs,temp_GUPs,1,0,0,0,NumProcs,pSync_bcast);
shmem_double_broadcast(SHMEM_TEAM_WORLD,GUPs,temp_GUPs,1,0);
shmem_barrier_all();

/* Verification phase */
Expand All @@ -494,7 +480,7 @@ SHMEMRandomAccess(void)
}

shmem_barrier_all();
shmem_longlong_sum_to_all( (long long *)&GlbNumErrors, (long long *)&NumErrors, 1, 0,0, NumProcs,llpWrk, pSync_reduce);
shmem_longlong_sum_reduce(SHMEM_TEAM_WORLD, (long long *)&GlbNumErrors, (long long *)&NumErrors, 1);
shmem_barrier_all();

/* End timed section */
Expand Down
8 changes: 4 additions & 4 deletions test/performance/shmem_perf_suite/bi_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ static inline void bi_bw_put(int len, perf_metrics_t * const metric_info)
}

if (streaming_node(metric_info)) {
shmem_int_p(&fin, 1, dest);
shmem_int_atomic_set(&fin, 1, dest);
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0);
end = perf_shmemx_wtime();
calc_and_print_results(end, start, len, metric_info);
} else {
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1);
shmem_int_p(&fin, 0, dest);
shmem_int_atomic_set(&fin, 0, dest);
}

}
Expand Down Expand Up @@ -146,13 +146,13 @@ static inline void bi_bw_get(int len, perf_metrics_t * const metric_info)
}

if (streaming_node(metric_info)) {
shmem_int_p(&fin, 1, dest);
shmem_int_atomic_set(&fin, 1, dest);
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0);
end = perf_shmemx_wtime();
calc_and_print_results(end, start, len, metric_info);
} else {
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1);
shmem_int_p(&fin, 0, dest);
shmem_int_atomic_set(&fin, 0, dest);
}
}

30 changes: 14 additions & 16 deletions test/performance/shmem_perf_suite/bw_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,18 @@ void print_data_results(double bw, double mr, const perf_metrics_t * const data,
static inline
void calc_and_print_results(double end_t, double start_t, int len,
perf_metrics_t * const metric_info) {
int stride = 0, start_pe = 0, nPEs = 0;
int start_pe = 0, nPEs = metric_info->num_pes;
static double pe_bw_sum, bw = 0.0; /*must be symmetric for reduction*/
double pe_bw_avg = 0.0, pe_mr_avg = 0.0;
int nred_elements = 1;
static double pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static double pe_time_start, pe_time_end,
end_time_max = 0.0, start_time_min = 0.0;
double total_t = 0.0, total_t_max = 0.0;
int multiplier = 1;
shmem_team_t sync_team;

PE_set_used_adjustments(&nPEs, &stride, &start_pe, metric_info);
PE_set_used_adjustments(&nPEs, &start_pe, metric_info);
sync_team = (start_pe == 0) ? streaming_team : target_team;

/* 2x as many messages at once for bi-directional */
if(metric_info->b_type == BI_DIR)
Expand Down Expand Up @@ -188,16 +189,12 @@ void calc_and_print_results(double end_t, double start_t, int len,

pe_time_start = start_t;
pe_time_end = end_t;
shmem_barrier(start_pe, stride, nPEs, bar_psync);
shmem_team_sync(sync_team);
if (metric_info->cstyle != COMM_INCAST) {
if (nPEs >= 2) {
shmem_double_min_to_all(&start_time_min, &pe_time_start, nred_elements,
start_pe, stride, nPEs, pwrk,
red_psync);
shmem_barrier(start_pe, stride, nPEs, bar_psync);
shmem_double_max_to_all(&end_time_max, &pe_time_end, nred_elements,
start_pe, stride, nPEs, pwrk,
red_psync);
shmem_double_min_reduce(streaming_team, &start_time_min, &pe_time_start, nred_elements);
shmem_team_sync(sync_team);
shmem_double_max_reduce(streaming_team, &end_time_max, &pe_time_end, nred_elements);
} else if (nPEs == 1) {
start_time_min = pe_time_start;
end_time_max = pe_time_end;
Expand Down Expand Up @@ -226,9 +223,7 @@ void calc_and_print_results(double end_t, double start_t, int len,
pe_bw_sum = bw;
} else {
if (nPEs >= 2) {
shmem_double_sum_to_all(&pe_bw_sum, &bw, nred_elements,
start_pe, stride, nPEs, pwrk,
red_psync);
shmem_double_sum_reduce(streaming_team, &pe_bw_sum, &bw, nred_elements);
} else if (nPEs == 1) {
pe_bw_sum = bw;
}
Expand Down Expand Up @@ -421,9 +416,8 @@ int bw_init_data_stream(perf_metrics_t * const metric_info,
#if defined(ENABLE_THREADS)
thread_safety_validation_check(metric_info);
#endif
init_psync_arrays();

if(only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) {
if (only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) {
return -1;
}

Expand All @@ -433,6 +427,10 @@ int bw_init_data_stream(perf_metrics_t * const metric_info,
metric_info->dest = aligned_buffer_alloc(metric_info->max_len * metric_info->nthreads);
init_array(metric_info->dest, metric_info->max_len * metric_info->nthreads, metric_info->my_node);

if (create_teams(metric_info) != 0) {
return -1;
}

return 0;
}

Expand Down
86 changes: 49 additions & 37 deletions test/performance/shmem_perf_suite/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,15 @@ typedef struct perf_metrics {
int individual_report;
} perf_metrics_t;

/* psync arrays used in metric calculation */
long red_psync[SHMEM_REDUCE_SYNC_SIZE];
long bar_psync[SHMEM_BARRIER_SYNC_SIZE];

shmem_team_t streaming_team, target_team;

/* default settings with no input provided */
static inline
void set_metric_defaults(perf_metrics_t *metric_info) {
char *val = NULL;
metric_info->trials_multiplier = 1.0; /* Default 1 */
val = getenv("SHMEM_PERF_SUITE_TRIALS_MULTIPLIER");

if (val && strlen(val))
metric_info->trials_multiplier = atof(val);

Expand Down Expand Up @@ -220,17 +218,6 @@ void update_metrics(perf_metrics_t *metric_info) {
metric_info->midpt = metric_info->num_pes / 2;
}

/* init psync arrays */
static inline
void init_psync_arrays(void) {
int i;
for(i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++)
red_psync[i] = SHMEM_SYNC_VALUE;

for(i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++)
bar_psync[i] = SHMEM_SYNC_VALUE;
}

/* return microseconds */
double perf_shmemx_wtime(void);

Expand Down Expand Up @@ -572,6 +559,7 @@ void thread_safety_validation_check(perf_metrics_t * const metric_info) {
}
#endif

/* Only even number of PEs are allowed for performance tests */
static inline
int only_even_PEs_check(int my_node, int num_pes) {
if (num_pes % 2 != 0) {
Expand Down Expand Up @@ -612,7 +600,7 @@ int partner_node(const perf_metrics_t * const my_info)
static inline
int streaming_node(const perf_metrics_t * const my_info)
{
if(my_info->cstyle == COMM_PAIRWISE) {
if (my_info->cstyle == COMM_PAIRWISE) {
return (my_info->my_node < my_info->szinitiator);
} else {
assert(my_info->cstyle == COMM_INCAST);
Expand Down Expand Up @@ -643,16 +631,9 @@ int check_hostname_validation(const perf_metrics_t * const my_info) {

int hostname_status = -1;

/* hostname_size should be a length divisible by 4 */
int hostname_size = (MAX_HOSTNAME_LEN % 4 == 0) ? MAX_HOSTNAME_LEN :
MAX_HOSTNAME_LEN + (4 - MAX_HOSTNAME_LEN % 4);
int hostname_size = MAX_HOSTNAME_LEN;
int i, errors = 0;

/* pSync for fcollect of hostnames */
static long pSync_collect[SHMEM_COLLECT_SYNC_SIZE];
for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++)
pSync_collect[i] = SHMEM_SYNC_VALUE;

char *hostname = (char *) shmem_malloc (hostname_size * sizeof(char));
char *dest = (char *) shmem_malloc (my_info->num_pes * hostname_size *
sizeof(char));
Expand All @@ -669,9 +650,7 @@ int check_hostname_validation(const perf_metrics_t * const my_info) {
}
shmem_barrier_all();

/* nelems needs to be updated based on 32-bit API */
shmem_fcollect32(dest, hostname, hostname_size/4, 0, 0, my_info->num_pes,
pSync_collect);
shmem_char_fcollect(SHMEM_TEAM_WORLD, dest, hostname, hostname_size);

char *snode_name = NULL;
char *tnode_name = NULL;
Expand Down Expand Up @@ -769,16 +748,16 @@ void large_message_metric_chg(perf_metrics_t * const metric_info, int len) {
static inline
red_PE_set validation_set(perf_metrics_t * const my_info, int *nPEs)
{
if(my_info->cstyle == COMM_PAIRWISE) {
if(streaming_node(my_info)) {
if (my_info->cstyle == COMM_PAIRWISE) {
if (streaming_node(my_info)) {
*nPEs = my_info->szinitiator;
return FIRST_HALF;
} else if(target_node(my_info)) {
} else if (target_node(my_info)) {
*nPEs = my_info->sztarget;
return SECOND_HALF;
} else {
fprintf(stderr, "Warning: you are getting data from a node that "
"wasn't a part of the perf set \n ");
"wasn't a part of the perf set \n ");
return 0;
}
} else {
Expand All @@ -788,11 +767,8 @@ red_PE_set validation_set(perf_metrics_t * const my_info, int *nPEs)
}
}

/* reduction to collect performance results from PE set
* then start_pe will print results --- assumes num_pes is even */
static inline
void PE_set_used_adjustments(int *nPEs, int *stride, int *start_pe,
perf_metrics_t * const my_info) {
void PE_set_used_adjustments(int *nPEs, int *start_pe, perf_metrics_t * const my_info) {
red_PE_set PE_set = validation_set(my_info, nPEs);

if(PE_set == FIRST_HALF || PE_set == FULL_SET) {
Expand All @@ -802,8 +778,6 @@ void PE_set_used_adjustments(int *nPEs, int *stride, int *start_pe,
assert(PE_set == SECOND_HALF);
*start_pe = my_info->midpt;
}

*stride = 0; /* back to back PEs */
}

static
Expand All @@ -822,3 +796,41 @@ void print_header(perf_metrics_t * const metric_info) {
#endif
printf("\n");
}

static
int create_streaming_team(perf_metrics_t * const metric_info) {
shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 1, metric_info->num_pes / 2, NULL, 0, &streaming_team);

int my_pe = metric_info->my_node;
if (streaming_team == SHMEM_TEAM_INVALID && (my_pe >= 0 && my_pe < metric_info->num_pes / 2)) {
fprintf(stderr, "PE %d: Streaming team creation failed\n", metric_info->my_node);
return -1;
}

return 0;
}

static
int create_target_team(perf_metrics_t * const metric_info) {
shmem_team_split_strided(SHMEM_TEAM_WORLD, metric_info->midpt, 1, metric_info->num_pes / 2, NULL, 0, &target_team);

int my_pe = metric_info->my_node;
if (target_team == SHMEM_TEAM_INVALID && (my_pe >= metric_info->midpt && my_pe < metric_info->num_pes)) {
fprintf(stderr, "PE %d: Target team creation failed\n", metric_info->my_node);
return -1;
}

return 0;
}

/* Create two teams: streaming and target.
* PEs [0, 1, ..., npes/2-1] will be in streaming_team and
* PEs [npes/2, npes/2+1, ..., npes-1] in target_team. */
static
int create_teams(perf_metrics_t * const metric_info) {
int ret = create_streaming_team(metric_info);
if (!ret)
return create_target_team(metric_info);

return ret;
}
22 changes: 12 additions & 10 deletions test/performance/shmem_perf_suite/latency_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,14 @@ void print_latency_header(void) {
static inline
void calc_and_print_results(double start, double end, int len,
perf_metrics_t * const metric_info) {
int stride = 0, start_pe = 0, nPEs = 0;
int start_pe = 0, nPEs = metric_info->num_pes;
int nred_elements = 1;
static double latency = 0.0, avg_latency = 0.0;
static double pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE];

PE_set_used_adjustments(&nPEs, &stride, &start_pe, metric_info);

shmem_team_t sync_team;

PE_set_used_adjustments(&nPEs, &start_pe, metric_info);
sync_team = (start_pe == 0) ? streaming_team : target_team;

if (end > 0 && start > 0 && (end - start) > 0) {
latency = (end - start) / metric_info->trials;
} else {
Expand All @@ -69,12 +70,10 @@ void calc_and_print_results(double start, double end, int len,
printf("Individual latency for PE %6d is %10.2f\n",
metric_info->my_node, latency);
}
shmem_barrier(start_pe, stride, nPEs, bar_psync);
shmem_team_sync(sync_team);

if (nPEs >= 2) {
shmem_double_sum_to_all(&avg_latency, &latency,
nred_elements, start_pe, stride,
nPEs, pwrk, red_psync);
shmem_double_sum_reduce(streaming_team, &avg_latency, &latency, nred_elements);
avg_latency /= nPEs;
} else {
avg_latency = latency;
Expand Down Expand Up @@ -175,7 +174,6 @@ int latency_init_resources(int argc, char *argv[],
#if defined(ENABLE_THREADS)
thread_safety_validation_check(metric_info);
#endif
init_psync_arrays();

if(only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) {
return -1;
Expand All @@ -193,6 +191,10 @@ int latency_init_resources(int argc, char *argv[],
metric_info->target = shmalloc(sizeof(long));
#endif

if (create_teams(metric_info) != 0) {
return -1;
}

return 0;
}

Expand Down
Loading

0 comments on commit 3480dae

Please sign in to comment.