diff --git a/test/apps/gups.c b/test/apps/gups.c index 32145ff..98e8b3a 100644 --- a/test/apps/gups.c +++ b/test/apps/gups.c @@ -322,25 +322,11 @@ SHMEMRandomAccess(void) uint64_t NumUpdates; /* total number of updates to table */ uint64_t ProcNumUpdates; /* number of updates per processor */ - static long pSync_bcast[SHMEM_BCAST_SYNC_SIZE]; - static long long int llpWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - - static long pSync_reduce[SHMEM_REDUCE_SYNC_SIZE]; - static int ipWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - FILE *outFile = NULL; double *GUPs; double *temp_GUPs; - for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1){ - pSync_bcast[i] = SHMEM_SYNC_VALUE; - } - - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1){ - pSync_reduce[i] = SHMEM_SYNC_VALUE; - } - SHMEMGUPs = -1; GUPs = &SHMEMGUPs; @@ -399,7 +385,7 @@ SHMEMRandomAccess(void) if (! HPCC_PELock) sAbort = 1; shmem_barrier_all(); - shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, pSync_reduce); + shmem_int_sum_reduce(SHMEM_TEAM_WORLD, &rAbort, &sAbort, 1); shmem_barrier_all(); if (rAbort > 0) { @@ -468,7 +454,7 @@ SHMEMRandomAccess(void) /* distribute result to all nodes */ temp_GUPs = GUPs; shmem_barrier_all(); - shmem_broadcast64(GUPs,temp_GUPs,1,0,0,0,NumProcs,pSync_bcast); + shmem_double_broadcast(SHMEM_TEAM_WORLD,GUPs,temp_GUPs,1,0); shmem_barrier_all(); /* Verification phase */ @@ -494,7 +480,7 @@ SHMEMRandomAccess(void) } shmem_barrier_all(); - shmem_longlong_sum_to_all( (long long *)&GlbNumErrors, (long long *)&NumErrors, 1, 0,0, NumProcs,llpWrk, pSync_reduce); + shmem_longlong_sum_reduce(SHMEM_TEAM_WORLD, (long long *)&GlbNumErrors, (long long *)&NumErrors, 1); shmem_barrier_all(); /* End timed section */ diff --git a/test/performance/shmem_perf_suite/bi_dir.h b/test/performance/shmem_perf_suite/bi_dir.h index d857dfe..97dc469 100644 --- a/test/performance/shmem_perf_suite/bi_dir.h +++ b/test/performance/shmem_perf_suite/bi_dir.h @@ -75,13 +75,13 @@ static inline void bi_bw_put(int len, perf_metrics_t * const metric_info) } if (streaming_node(metric_info)) { - shmem_int_p(&fin, 1, dest); + shmem_int_atomic_set(&fin, 1, dest); shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0); end = perf_shmemx_wtime(); calc_and_print_results(end, start, len, metric_info); } else { shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1); - shmem_int_p(&fin, 0, dest); + shmem_int_atomic_set(&fin, 0, dest); } } @@ -146,13 +146,13 @@ static inline void bi_bw_get(int len, perf_metrics_t * const metric_info) } if (streaming_node(metric_info)) { - shmem_int_p(&fin, 1, dest); + shmem_int_atomic_set(&fin, 1, dest); shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0); end = perf_shmemx_wtime(); calc_and_print_results(end, start, len, metric_info); } else { shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1); - shmem_int_p(&fin, 0, dest); + shmem_int_atomic_set(&fin, 0, dest); } } diff --git a/test/performance/shmem_perf_suite/bw_common.h b/test/performance/shmem_perf_suite/bw_common.h index 7d08dda..257a117 100644 --- a/test/performance/shmem_perf_suite/bw_common.h +++ b/test/performance/shmem_perf_suite/bw_common.h @@ -141,17 +141,18 @@ void print_data_results(double bw, double mr, const perf_metrics_t * const data, static inline void calc_and_print_results(double end_t, double start_t, int len, perf_metrics_t * const metric_info) { - int stride = 0, start_pe = 0, nPEs = 0; + int start_pe = 0, nPEs = metric_info->num_pes; static double pe_bw_sum, bw = 0.0; /*must be symmetric for reduction*/ double pe_bw_avg = 0.0, pe_mr_avg = 0.0; int nred_elements = 1; - static double pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; static double pe_time_start, pe_time_end, end_time_max = 0.0, start_time_min = 0.0; double total_t = 0.0, total_t_max = 0.0; int multiplier = 1; + shmem_team_t sync_team; - PE_set_used_adjustments(&nPEs, &stride, &start_pe, metric_info); + PE_set_used_adjustments(&nPEs, &start_pe, metric_info); + sync_team = (start_pe == 0) ? streaming_team : target_team; /* 2x as many messages at once for bi-directional */ if(metric_info->b_type == BI_DIR) @@ -188,16 +189,12 @@ void calc_and_print_results(double end_t, double start_t, int len, pe_time_start = start_t; pe_time_end = end_t; - shmem_barrier(start_pe, stride, nPEs, bar_psync); + shmem_team_sync(sync_team); if (metric_info->cstyle != COMM_INCAST) { if (nPEs >= 2) { - shmem_double_min_to_all(&start_time_min, &pe_time_start, nred_elements, - start_pe, stride, nPEs, pwrk, - red_psync); - shmem_barrier(start_pe, stride, nPEs, bar_psync); - shmem_double_max_to_all(&end_time_max, &pe_time_end, nred_elements, - start_pe, stride, nPEs, pwrk, - red_psync); + shmem_double_min_reduce(streaming_team, &start_time_min, &pe_time_start, nred_elements); + shmem_team_sync(sync_team); + shmem_double_max_reduce(streaming_team, &end_time_max, &pe_time_end, nred_elements); } else if (nPEs == 1) { start_time_min = pe_time_start; end_time_max = pe_time_end; @@ -226,9 +223,7 @@ void calc_and_print_results(double end_t, double start_t, int len, pe_bw_sum = bw; } else { if (nPEs >= 2) { - shmem_double_sum_to_all(&pe_bw_sum, &bw, nred_elements, - start_pe, stride, nPEs, pwrk, - red_psync); + shmem_double_sum_reduce(streaming_team, &pe_bw_sum, &bw, nred_elements); } else if (nPEs == 1) { pe_bw_sum = bw; } @@ -421,9 +416,8 @@ int bw_init_data_stream(perf_metrics_t * const metric_info, #if defined(ENABLE_THREADS) thread_safety_validation_check(metric_info); #endif - init_psync_arrays(); - if(only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) { + if (only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) { return -1; } @@ -433,6 +427,10 @@ int bw_init_data_stream(perf_metrics_t * const metric_info, metric_info->dest = aligned_buffer_alloc(metric_info->max_len * metric_info->nthreads); init_array(metric_info->dest, metric_info->max_len * metric_info->nthreads, metric_info->my_node); + if (create_teams(metric_info) != 0) { + return -1; + } + return 0; } diff --git a/test/performance/shmem_perf_suite/common.h b/test/performance/shmem_perf_suite/common.h index c8186ec..5ecc956 100644 --- a/test/performance/shmem_perf_suite/common.h +++ b/test/performance/shmem_perf_suite/common.h @@ -168,9 +168,8 @@ typedef struct perf_metrics { int individual_report; } perf_metrics_t; -/* psync arrays used in metric calculation */ -long red_psync[SHMEM_REDUCE_SYNC_SIZE]; -long bar_psync[SHMEM_BARRIER_SYNC_SIZE]; + +shmem_team_t streaming_team, target_team; /* default settings with no input provided */ static inline @@ -178,7 +177,6 @@ void set_metric_defaults(perf_metrics_t *metric_info) { char *val = NULL; metric_info->trials_multiplier = 1.0; /* Default 1 */ val = getenv("SHMEM_PERF_SUITE_TRIALS_MULTIPLIER"); - if (val && strlen(val)) metric_info->trials_multiplier = atof(val); @@ -220,17 +218,6 @@ void update_metrics(perf_metrics_t *metric_info) { metric_info->midpt = metric_info->num_pes / 2; } -/* init psync arrays */ -static inline -void init_psync_arrays(void) { - int i; - for(i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - red_psync[i] = SHMEM_SYNC_VALUE; - - for(i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) - bar_psync[i] = SHMEM_SYNC_VALUE; -} - /* return microseconds */ double perf_shmemx_wtime(void); @@ -572,6 +559,7 @@ void thread_safety_validation_check(perf_metrics_t * const metric_info) { } #endif +/* Only even number of PEs are allowed for performance tests */ static inline int only_even_PEs_check(int my_node, int num_pes) { if (num_pes % 2 != 0) { @@ -612,7 +600,7 @@ int partner_node(const perf_metrics_t * const my_info) static inline int streaming_node(const perf_metrics_t * const my_info) { - if(my_info->cstyle == COMM_PAIRWISE) { + if (my_info->cstyle == COMM_PAIRWISE) { return (my_info->my_node < my_info->szinitiator); } else { assert(my_info->cstyle == COMM_INCAST); @@ -643,16 +631,9 @@ int check_hostname_validation(const perf_metrics_t * const my_info) { int hostname_status = -1; - /* hostname_size should be a length divisible by 4 */ - int hostname_size = (MAX_HOSTNAME_LEN % 4 == 0) ? MAX_HOSTNAME_LEN : - MAX_HOSTNAME_LEN + (4 - MAX_HOSTNAME_LEN % 4); + int hostname_size = MAX_HOSTNAME_LEN; int i, errors = 0; - /* pSync for fcollect of hostnames */ - static long pSync_collect[SHMEM_COLLECT_SYNC_SIZE]; - for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) - pSync_collect[i] = SHMEM_SYNC_VALUE; - char *hostname = (char *) shmem_malloc (hostname_size * sizeof(char)); char *dest = (char *) shmem_malloc (my_info->num_pes * hostname_size * sizeof(char)); @@ -669,9 +650,7 @@ int check_hostname_validation(const perf_metrics_t * const my_info) { } shmem_barrier_all(); - /* nelems needs to be updated based on 32-bit API */ - shmem_fcollect32(dest, hostname, hostname_size/4, 0, 0, my_info->num_pes, - pSync_collect); + shmem_char_fcollect(SHMEM_TEAM_WORLD, dest, hostname, hostname_size); char *snode_name = NULL; char *tnode_name = NULL; @@ -769,16 +748,16 @@ void large_message_metric_chg(perf_metrics_t * const metric_info, int len) { static inline red_PE_set validation_set(perf_metrics_t * const my_info, int *nPEs) { - if(my_info->cstyle == COMM_PAIRWISE) { - if(streaming_node(my_info)) { + if (my_info->cstyle == COMM_PAIRWISE) { + if (streaming_node(my_info)) { *nPEs = my_info->szinitiator; return FIRST_HALF; - } else if(target_node(my_info)) { + } else if (target_node(my_info)) { *nPEs = my_info->sztarget; return SECOND_HALF; } else { fprintf(stderr, "Warning: you are getting data from a node that " - "wasn't a part of the perf set \n "); + "wasn't a part of the perf set \n "); return 0; } } else { @@ -788,11 +767,8 @@ red_PE_set validation_set(perf_metrics_t * const my_info, int *nPEs) } } -/* reduction to collect performance results from PE set - * then start_pe will print results --- assumes num_pes is even */ static inline -void PE_set_used_adjustments(int *nPEs, int *stride, int *start_pe, - perf_metrics_t * const my_info) { +void PE_set_used_adjustments(int *nPEs, int *start_pe, perf_metrics_t * const my_info) { red_PE_set PE_set = validation_set(my_info, nPEs); if(PE_set == FIRST_HALF || PE_set == FULL_SET) { @@ -802,8 +778,6 @@ void PE_set_used_adjustments(int *nPEs, int *stride, int *start_pe, assert(PE_set == SECOND_HALF); *start_pe = my_info->midpt; } - - *stride = 0; /* back to back PEs */ } static @@ -822,3 +796,41 @@ void print_header(perf_metrics_t * const metric_info) { #endif printf("\n"); } + +static +int create_streaming_team(perf_metrics_t * const metric_info) { + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 1, metric_info->num_pes / 2, NULL, 0, &streaming_team); + + int my_pe = metric_info->my_node; + if (streaming_team == SHMEM_TEAM_INVALID && (my_pe >= 0 && my_pe < metric_info->num_pes / 2)) { + fprintf(stderr, "PE %d: Streaming team creation failed\n", metric_info->my_node); + return -1; + } + + return 0; +} + +static +int create_target_team(perf_metrics_t * const metric_info) { + shmem_team_split_strided(SHMEM_TEAM_WORLD, metric_info->midpt, 1, metric_info->num_pes / 2, NULL, 0, &target_team); + + int my_pe = metric_info->my_node; + if (target_team == SHMEM_TEAM_INVALID && (my_pe >= metric_info->midpt && my_pe < metric_info->num_pes)) { + fprintf(stderr, "PE %d: Target team creation failed\n", metric_info->my_node); + return -1; + } + + return 0; +} + +/* Create two teams: streaming and target. + * PEs [0, 1, ..., npes/2-1] will be in streaming_team and + * PEs [npes/2, npes/2+1, ..., npes-1] in target_team. */ +static +int create_teams(perf_metrics_t * const metric_info) { + int ret = create_streaming_team(metric_info); + if (!ret) + return create_target_team(metric_info); + + return ret; +} diff --git a/test/performance/shmem_perf_suite/latency_common.h b/test/performance/shmem_perf_suite/latency_common.h index f2e41f0..2e47344 100644 --- a/test/performance/shmem_perf_suite/latency_common.h +++ b/test/performance/shmem_perf_suite/latency_common.h @@ -51,13 +51,14 @@ void print_latency_header(void) { static inline void calc_and_print_results(double start, double end, int len, perf_metrics_t * const metric_info) { - int stride = 0, start_pe = 0, nPEs = 0; + int start_pe = 0, nPEs = metric_info->num_pes; int nred_elements = 1; static double latency = 0.0, avg_latency = 0.0; - static double pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - - PE_set_used_adjustments(&nPEs, &stride, &start_pe, metric_info); - + shmem_team_t sync_team; + + PE_set_used_adjustments(&nPEs, &start_pe, metric_info); + sync_team = (start_pe == 0) ? streaming_team : target_team; + if (end > 0 && start > 0 && (end - start) > 0) { latency = (end - start) / metric_info->trials; } else { @@ -69,12 +70,10 @@ void calc_and_print_results(double start, double end, int len, printf("Individual latency for PE %6d is %10.2f\n", metric_info->my_node, latency); } - shmem_barrier(start_pe, stride, nPEs, bar_psync); + shmem_team_sync(sync_team); if (nPEs >= 2) { - shmem_double_sum_to_all(&avg_latency, &latency, - nred_elements, start_pe, stride, - nPEs, pwrk, red_psync); + shmem_double_sum_reduce(streaming_team, &avg_latency, &latency, nred_elements); avg_latency /= nPEs; } else { avg_latency = latency; @@ -175,7 +174,6 @@ int latency_init_resources(int argc, char *argv[], #if defined(ENABLE_THREADS) thread_safety_validation_check(metric_info); #endif - init_psync_arrays(); if(only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) { return -1; @@ -193,6 +191,10 @@ int latency_init_resources(int argc, char *argv[], metric_info->target = shmalloc(sizeof(long)); #endif + if (create_teams(metric_info) != 0) { + return -1; + } + return 0; } diff --git a/test/performance/shmem_perf_suite/uni_dir.h b/test/performance/shmem_perf_suite/uni_dir.h index d238077..7f113a2 100644 --- a/test/performance/shmem_perf_suite/uni_dir.h +++ b/test/performance/shmem_perf_suite/uni_dir.h @@ -80,13 +80,13 @@ static inline void uni_bw_put(int len, perf_metrics_t *metric_info) } shmem_quiet(); } - shmem_int_p(&fin, 1, dest); + shmem_int_atomic_set(&fin, 1, dest); shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0); end = perf_shmemx_wtime(); calc_and_print_results(end, start, len, metric_info); } else { shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1); - shmem_int_p(&fin, 0, dest); + shmem_int_atomic_set(&fin, 0, dest); } } @@ -151,13 +151,13 @@ static inline void uni_bw_get(int len, perf_metrics_t *metric_info) shmem_quiet(); #endif } - shmem_int_p(&fin, 1, dest); + shmem_int_atomic_set(&fin, 1, dest); shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0); end = perf_shmemx_wtime(); calc_and_print_results(end, start, len, metric_info); } else { shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1); - shmem_int_p(&fin, 0, dest); + shmem_int_atomic_set(&fin, 0, dest); } } diff --git a/test/performance/tests/msgrate.c b/test/performance/tests/msgrate.c index 941475f..6c2297c 100644 --- a/test/performance/tests/msgrate.c +++ b/test/performance/tests/msgrate.c @@ -33,12 +33,12 @@ #include /* configuration parameters - setable by command line arguments */ -int npeers = 6; -int niters = 4096; +int npeers = 2; +int niters = 128; int nmsgs = 128; int nbytes = 8; int cache_size = (8 * 1024 * 1024 / sizeof(int)); -int ppn = -1; +int ppn = 1; int machine_output = 0; /* globals */ @@ -47,10 +47,6 @@ int *recv_peers; int *cache_buf; char *send_buf; char *recv_buf; -long bcast_pSync[SHMEM_BCAST_SYNC_SIZE]; -long barrier_pSync[SHMEM_BARRIER_SYNC_SIZE]; -long reduce_pSync[SHMEM_REDUCE_SYNC_SIZE]; -double reduce_pWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; int start_err = 0; double tmp = 0; double total = 0; @@ -108,23 +104,25 @@ static void test_one_way(void) { int i, k; - int pe_size = world_size; tmp = 0; total = 0; shmem_barrier_all(); - if (world_size % 2 == 1) { - pe_size = world_size - 1; - } + shmem_team_t sync_team; + if (world_size % 2 == 1 && world_size != 1) + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 1, world_size - 1, NULL, 0, &sync_team); + else + sync_team = SHMEM_TEAM_WORLD; if (!(world_size % 2 == 1 && rank == (world_size - 1))) { if (rank < world_size / 2) { for (i = 0 ; i < niters ; ++i) { cache_invalidate(); - shmem_barrier(0, 0, pe_size, barrier_pSync); + shmem_quiet(); + shmem_team_sync(sync_team); tmp = timer(); for (k = 0 ; k < nmsgs ; ++k) { @@ -139,7 +137,8 @@ test_one_way(void) for (i = 0 ; i < niters ; ++i) { cache_invalidate(); - shmem_barrier(0, 0, pe_size, barrier_pSync); + shmem_quiet(); + shmem_team_sync(sync_team); tmp = timer(); shmem_short_wait_until((short*) (recv_buf + (nbytes * (nmsgs - 1))), SHMEM_CMP_NE, 0); @@ -148,7 +147,7 @@ test_one_way(void) } } - shmem_double_sum_to_all(&tmp, &total, 1, 0, 0, pe_size, reduce_pWrk, reduce_pSync); + shmem_double_sum_reduce(sync_team, &tmp, &total, 1); display_result("single direction", (niters * nmsgs) / (tmp / world_size)); } @@ -192,7 +191,7 @@ test_prepost(void) memset(recv_buf, 0, npeers * nmsgs * nbytes); } - shmem_double_sum_to_all(&tmp, &total, 1, 0, 0, world_size, reduce_pWrk, reduce_pSync); + shmem_double_sum_reduce(SHMEM_TEAM_WORLD, &tmp, &total, 1); display_result("pre-post", (niters * npeers * nmsgs * 2) / (tmp / world_size)); } @@ -267,55 +266,40 @@ main(int argc, char *argv[]) /* sanity check */ if (start_err != 1) { -#if 0 - if (world_size < 3) { - fprintf(stderr, "Error: At least three processes are required\n"); - start_err = 1; - } else -#endif - if (world_size <= npeers) { - fprintf(stderr, "Error: job size (%d) <= number of peers (%d)\n", - world_size, npeers); + if (world_size < npeers) { + fprintf(stderr, "Error: job size (%d) < number of peers (%d)\n", + world_size, npeers); start_err = 77; } else if (ppn < 1) { fprintf(stderr, "Error: must specify process per node (-n #)\n"); start_err = 77; - } else if (world_size / ppn <= npeers) { - fprintf(stderr, "Error: node count <= number of peers\n"); + } else if (world_size / ppn < npeers) { + fprintf(stderr, "Error: node count < number of peers\n"); start_err = 77; } } } - for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) - bcast_pSync[i] = SHMEM_SYNC_VALUE; - for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) - barrier_pSync[i] = SHMEM_SYNC_VALUE; - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - reduce_pSync[i] = SHMEM_SYNC_VALUE; - for (i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) - reduce_pWrk[i] = SHMEM_SYNC_VALUE; - shmem_barrier_all(); /* broadcast results */ - shmem_broadcast32(&start_err, &start_err, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &start_err, &start_err, 1, 0); if (0 != start_err) { shmem_finalize(); exit(start_err); } shmem_barrier_all(); - shmem_broadcast32(&npeers, &npeers, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &npeers, &npeers, 1, 0); shmem_barrier_all(); - shmem_broadcast32(&niters, &niters, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &niters, &niters, 1, 0); shmem_barrier_all(); - shmem_broadcast32(&nmsgs, &nmsgs, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &nmsgs, &nmsgs, 1, 0); shmem_barrier_all(); - shmem_broadcast32(&nbytes, &nbytes, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &nbytes, &nbytes, 1, 0); shmem_barrier_all(); - shmem_broadcast32(&cache_size, &cache_size, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &cache_size, &cache_size, 1, 0); shmem_barrier_all(); - shmem_broadcast32(&ppn, &ppn, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_int_broadcast(SHMEM_TEAM_WORLD, &ppn, &ppn, 1, 0); shmem_barrier_all(); if (0 == rank) { if (!machine_output) { diff --git a/test/performance/tests/shmemlatency.c b/test/performance/tests/shmemlatency.c index 7eb4fca..decb7f8 100644 --- a/test/performance/tests/shmemlatency.c +++ b/test/performance/tests/shmemlatency.c @@ -49,6 +49,7 @@ #define TRUE (1) #define FALSE (0) +static uint64_t sig = 0; void doit(int len, double *latency, double *bandwidth); #ifndef HAVE_SHMEMX_WTIME @@ -93,8 +94,8 @@ main(int argc, char *argv[]) error= FALSE; start_len= 1; end_len= 1024; - increment = 16; - trials= 1000; + increment = 64; + trials= 100; mega= TRUE; /* check command line args */ @@ -157,6 +158,7 @@ main(int argc, char *argv[]) for (i= 0; i < trials; i++) { buf[len-1] = (char)my_node; + sig = 0; shmem_barrier_all(); @@ -215,9 +217,9 @@ doit(int len, double *latency, double *bandwidth) start = shmemx_wtime(); - shmem_putmem( buf, buf, len, 1 ); + shmem_putmem_signal( buf, buf, len, &sig, 1, SHMEM_SIGNAL_SET, 1); - shmem_long_wait_until( (long *)&buf[len-1], SHMEM_CMP_NE, (long)0 ); + shmem_uint64_wait_until( &sig, SHMEM_CMP_EQ, (uint64_t) 1 ); end = shmemx_wtime(); @@ -231,11 +233,9 @@ doit(int len, double *latency, double *bandwidth) } else { - shmem_long_wait_until( (long *)&buf[len-1], SHMEM_CMP_NE, (long)1 ); + shmem_uint64_wait_until( &sig, SHMEM_CMP_EQ, (uint64_t) 1 ); - buf[len-1] = (char)1; - - shmem_putmem( buf, buf, len, 0 ); + shmem_putmem_signal( buf, buf, len, &sig, 1, SHMEM_SIGNAL_SET, 0); *latency = 1.0; *bandwidth = 10.0; diff --git a/test/shmemx/cxx_test_shmem_atomic_add.cpp b/test/shmemx/cxx_test_shmem_atomic_add.cpp index cdc32f3..e64852a 100644 --- a/test/shmemx/cxx_test_shmem_atomic_add.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_add.cpp @@ -36,10 +36,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { ADD = 0, ATOMIC_ADD, CTX_ATOMIC_ADD, FADD, ATOMIC_FETCH_ADD, CTX_ATOMIC_FETCH_ADD, ATOMIC_FETCH_ADD_NBI, CTX_ATOMIC_FETCH_ADD_NBI }; @@ -51,7 +47,6 @@ enum op { ADD = 0, ATOMIC_ADD, CTX_ATOMIC_ADD, FADD, ATOMIC_FETCH_ADD, #define DEPRECATED_FADD shmem_atomic_fetch_add #endif -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case ATOMIC_FETCH_ADD_NBI: \ shmem_atomic_fetch_add_nbi(&old, &remote, (TYPE)(mype + 1), i); \ @@ -72,9 +67,6 @@ enum op { ADD = 0, ATOMIC_ADD, CTX_ATOMIC_ADD, FADD, ATOMIC_FETCH_ADD, rc = EXIT_FAILURE; \ } \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_ADD(OP, TYPE) \ do { \ @@ -218,7 +210,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, size_t); TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, ptrdiff_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_ADD(ATOMIC_FETCH_ADD_NBI, int); TEST_SHMEM_ADD(ATOMIC_FETCH_ADD_NBI, long); TEST_SHMEM_ADD(ATOMIC_FETCH_ADD_NBI, long long); @@ -244,7 +235,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD_NBI, uint64_t); TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD_NBI, size_t); TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD_NBI, ptrdiff_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_and.cpp b/test/shmemx/cxx_test_shmem_atomic_and.cpp index 81e9b6f..872f31d 100644 --- a/test/shmemx/cxx_test_shmem_atomic_and.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_and.cpp @@ -37,10 +37,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { AND = 0, CTX_AND, FETCH_AND, CTX_FETCH_AND, FETCH_AND_NBI, CTX_FETCH_AND_NBI }; @@ -49,7 +45,6 @@ enum op { AND = 0, CTX_AND, FETCH_AND, CTX_FETCH_AND, FETCH_AND_NBI, * The result has the NPES least significant bits cleared, 111...000...b. */ -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case FETCH_AND_NBI: \ shmem_atomic_fetch_and_nbi(&old, &remote, \ @@ -71,9 +66,6 @@ enum op { AND = 0, CTX_AND, FETCH_AND, CTX_FETCH_AND, FETCH_AND_NBI, rc = EXIT_FAILURE; \ } \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_AND(OP, TYPE) \ do { \ @@ -156,7 +148,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_AND(CTX_FETCH_AND, uint32_t); TEST_SHMEM_AND(CTX_FETCH_AND, uint64_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_AND(FETCH_AND_NBI, unsigned int); TEST_SHMEM_AND(FETCH_AND_NBI, unsigned long); TEST_SHMEM_AND(FETCH_AND_NBI, unsigned long long); @@ -172,7 +163,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_AND(CTX_FETCH_AND_NBI, int64_t); TEST_SHMEM_AND(CTX_FETCH_AND_NBI, uint32_t); TEST_SHMEM_AND(CTX_FETCH_AND_NBI, uint64_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_cswap.cpp b/test/shmemx/cxx_test_shmem_atomic_cswap.cpp index d70b840..a2d4404 100644 --- a/test/shmemx/cxx_test_shmem_atomic_cswap.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_cswap.cpp @@ -36,10 +36,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { CSWAP = 0, ATOMIC_COMPARE_SWAP, CTX_ATOMIC_COMPARE_SWAP, ATOMIC_COMPARE_SWAP_NBI, CTX_ATOMIC_COMPARE_SWAP_NBI }; @@ -49,7 +45,6 @@ enum op { CSWAP = 0, ATOMIC_COMPARE_SWAP, CTX_ATOMIC_COMPARE_SWAP, #define DEPRECATED_CSWAP shmem_atomic_compare_swap #endif -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case ATOMIC_COMPARE_SWAP_NBI: \ shmem_atomic_compare_swap_nbi(&old, &remote, (TYPE)npes, \ @@ -60,9 +55,6 @@ enum op { CSWAP = 0, ATOMIC_COMPARE_SWAP, CTX_ATOMIC_COMPARE_SWAP, (TYPE)npes, (TYPE)mype, \ (mype + 1) % npes); \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_CSWAP(OP, TYPE) \ do { \ @@ -151,7 +143,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, size_t); TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, ptrdiff_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP_NBI, int); TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP_NBI, long); TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP_NBI, long long); @@ -177,7 +168,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP_NBI, uint64_t); TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP_NBI, size_t); TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP_NBI, ptrdiff_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_fetch.cpp b/test/shmemx/cxx_test_shmem_atomic_fetch.cpp index 5255772..7f233aa 100644 --- a/test/shmemx/cxx_test_shmem_atomic_fetch.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_fetch.cpp @@ -36,10 +36,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { FETCH = 0, ATOMIC_FETCH, CTX_ATOMIC_FETCH, ATOMIC_FETCH_NBI, CTX_ATOMIC_FETCH_NBI }; @@ -49,7 +45,6 @@ enum op { FETCH = 0, ATOMIC_FETCH, CTX_ATOMIC_FETCH, ATOMIC_FETCH_NBI, #define DEPRECATED_FETCH shmem_atomic_fetch #endif -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case ATOMIC_FETCH_NBI: \ shmem_atomic_fetch_nbi(&val, &remote, \ @@ -61,9 +56,6 @@ enum op { FETCH = 0, ATOMIC_FETCH, CTX_ATOMIC_FETCH, ATOMIC_FETCH_NBI, &remote, (mype + 1) % npes); \ shmem_quiet(); \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_FETCH(OP, TYPE) \ do { \ @@ -148,7 +140,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, size_t); TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, ptrdiff_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_FETCH(ATOMIC_FETCH_NBI, float); TEST_SHMEM_FETCH(ATOMIC_FETCH_NBI, double); TEST_SHMEM_FETCH(ATOMIC_FETCH_NBI, int); @@ -178,7 +169,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH_NBI, uint64_t); TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH_NBI, size_t); TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH_NBI, ptrdiff_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_inc.cpp b/test/shmemx/cxx_test_shmem_atomic_inc.cpp index 31de80a..37273c4 100644 --- a/test/shmemx/cxx_test_shmem_atomic_inc.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_inc.cpp @@ -36,10 +36,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { INC = 0, ATOMIC_INC, CTX_ATOMIC_INC, FINC, ATOMIC_FETCH_INC, CTX_ATOMIC_FETCH_INC, ATOMIC_FETCH_INC_NBI, CTX_ATOMIC_FETCH_INC_NBI }; @@ -52,7 +48,6 @@ enum op { INC = 0, ATOMIC_INC, CTX_ATOMIC_INC, FINC, ATOMIC_FETCH_INC, #define DEPRECATED_FINC shmem_atomic_fetch_inc #endif -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case ATOMIC_FETCH_INC_NBI: \ shmem_atomic_fetch_inc_nbi(&old, &remote, i); \ @@ -72,9 +67,6 @@ enum op { INC = 0, ATOMIC_INC, CTX_ATOMIC_INC, FINC, ATOMIC_FETCH_INC, rc = EXIT_FAILURE; \ } \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_INC(OP, TYPE) \ do { \ @@ -218,7 +210,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, size_t); TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, ptrdiff_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_INC(ATOMIC_FETCH_INC_NBI, int); TEST_SHMEM_INC(ATOMIC_FETCH_INC_NBI, long); TEST_SHMEM_INC(ATOMIC_FETCH_INC_NBI, long long); @@ -244,7 +235,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC_NBI, uint64_t); TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC_NBI, size_t); TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC_NBI, ptrdiff_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_or.cpp b/test/shmemx/cxx_test_shmem_atomic_or.cpp index 1920d1d..47a17f0 100644 --- a/test/shmemx/cxx_test_shmem_atomic_or.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_or.cpp @@ -37,10 +37,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { OR = 0, CTX_OR, FETCH_OR, CTX_FETCH_OR, FETCH_OR_NBI, CTX_FETCH_OR_NBI }; @@ -49,7 +45,6 @@ enum op { OR = 0, CTX_OR, FETCH_OR, CTX_FETCH_OR, FETCH_OR_NBI, * The result has the NPES least significant bits set, 000...111...b. */ -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case FETCH_OR_NBI: \ shmem_atomic_fetch_or_nbi(&old, &remote, \ @@ -71,9 +66,6 @@ enum op { OR = 0, CTX_OR, FETCH_OR, CTX_FETCH_OR, FETCH_OR_NBI, rc = EXIT_FAILURE; \ } \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_OR(OP, TYPE) \ do { \ @@ -158,7 +150,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_OR(CTX_FETCH_OR, uint32_t); TEST_SHMEM_OR(CTX_FETCH_OR, uint64_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_OR(FETCH_OR_NBI, unsigned int); TEST_SHMEM_OR(FETCH_OR_NBI, unsigned long); TEST_SHMEM_OR(FETCH_OR_NBI, unsigned long long); @@ -174,7 +165,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_OR(CTX_FETCH_OR_NBI, int64_t); TEST_SHMEM_OR(CTX_FETCH_OR_NBI, uint32_t); TEST_SHMEM_OR(CTX_FETCH_OR_NBI, uint64_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_swap.cpp b/test/shmemx/cxx_test_shmem_atomic_swap.cpp index dbd132d..496f240 100644 --- a/test/shmemx/cxx_test_shmem_atomic_swap.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_swap.cpp @@ -36,10 +36,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { SWAP = 0, ATOMIC_SWAP, CTX_ATOMIC_SWAP, ATOMIC_SWAP_NBI, CTX_ATOMIC_SWAP_NBI }; @@ -49,7 +45,6 @@ enum op { SWAP = 0, ATOMIC_SWAP, CTX_ATOMIC_SWAP, ATOMIC_SWAP_NBI, #define DEPRECATED_SWAP shmem_atomic_swap #endif -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case ATOMIC_SWAP_NBI: \ shmem_atomic_swap_nbi(&old, &remote, \ @@ -59,9 +54,6 @@ enum op { SWAP = 0, ATOMIC_SWAP, CTX_ATOMIC_SWAP, ATOMIC_SWAP_NBI, shmem_atomic_swap_nbi(SHMEM_CTX_DEFAULT, &old, &remote, \ (TYPE)mype, (mype + 1) % npes); \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_SWAP(OP, TYPE) \ do { \ @@ -153,7 +145,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, size_t); TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, ptrdiff_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_SWAP(ATOMIC_SWAP_NBI, float); TEST_SHMEM_SWAP(ATOMIC_SWAP_NBI, double); TEST_SHMEM_SWAP(ATOMIC_SWAP_NBI, int); @@ -183,7 +174,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP_NBI, uint64_t); TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP_NBI, size_t); TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP_NBI, ptrdiff_t); -#endif shmem_finalize(); return rc; diff --git a/test/shmemx/cxx_test_shmem_atomic_xor.cpp b/test/shmemx/cxx_test_shmem_atomic_xor.cpp index 43efcc4..1350cba 100644 --- a/test/shmemx/cxx_test_shmem_atomic_xor.cpp +++ b/test/shmemx/cxx_test_shmem_atomic_xor.cpp @@ -37,10 +37,6 @@ #include #include -#ifdef ENABLE_SHMEMX_TESTS -#include -#endif - enum op { XOR = 0, CTX_XOR, FETCH_XOR, CTX_FETCH_XOR, FETCH_XOR_NBI, CTX_FETCH_XOR_NBI }; @@ -49,7 +45,6 @@ enum op { XOR = 0, CTX_XOR, FETCH_XOR, CTX_FETCH_XOR, FETCH_XOR_NBI, * The result has the NPES least significant bits cleared, 111...000...b. */ -#ifdef ENABLE_SHMEMX_TESTS #define SHMEM_NBI_OPS_CASES(OP, TYPE) \ case FETCH_XOR_NBI: \ shmem_atomic_fetch_xor_nbi(&old, &remote, \ @@ -71,9 +66,6 @@ enum op { XOR = 0, CTX_XOR, FETCH_XOR, CTX_FETCH_XOR, FETCH_XOR_NBI, rc = EXIT_FAILURE; \ } \ break; -#else -#define SHMEM_NBI_OPS_CASES(OP, TYPE) -#endif #define TEST_SHMEM_XOR(OP, TYPE) \ do { \ @@ -156,7 +148,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_XOR(CTX_FETCH_XOR, uint32_t); TEST_SHMEM_XOR(CTX_FETCH_XOR, uint64_t); -#ifdef ENABLE_SHMEMX_TESTS TEST_SHMEM_XOR(FETCH_XOR_NBI, unsigned int); TEST_SHMEM_XOR(FETCH_XOR_NBI, unsigned long); TEST_SHMEM_XOR(FETCH_XOR_NBI, unsigned long long); @@ -172,7 +163,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_XOR(CTX_FETCH_XOR_NBI, int64_t); TEST_SHMEM_XOR(CTX_FETCH_XOR_NBI, uint32_t); TEST_SHMEM_XOR(CTX_FETCH_XOR_NBI, uint64_t); -#endif shmem_finalize(); return rc; diff --git a/test/spec-example/Makefile.am b/test/spec-example/Makefile.am index d03529b..0d030c0 100644 --- a/test/spec-example/Makefile.am +++ b/test/spec-example/Makefile.am @@ -39,6 +39,12 @@ check_PROGRAMS += \ shmem_reduce_example endif +if USE_PMI_MPI +check_PROGRAMS += \ + hybrid_mpi_mapping_id \ + hybrid_mpi_mapping_id_shmem_comm +endif + TESTS = $(check_PROGRAMS) NPROCS ?= 2 diff --git a/test/spec-example/shmem_ctx.c b/test/spec-example/shmem_ctx.c index 518c67d..3b081fc 100644 --- a/test/spec-example/shmem_ctx.c +++ b/test/spec-example/shmem_ctx.c @@ -32,20 +32,14 @@ #include #include -long pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; -long psync[SHMEM_REDUCE_SYNC_SIZE]; - long task_cntr = 0; /* Next task counter */ long tasks_done = 0; /* Tasks done by this PE */ long total_done = 0; /* Total tasks done by all PEs */ int main(void) { - int tl, i, ret; + int tl, ret; long ntasks = 1024; /* Total tasks per PE */ - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; - ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { @@ -89,7 +83,7 @@ int main(void) { if (ctx != SHMEM_CTX_DEFAULT) shmem_ctx_destroy(ctx); } - shmem_long_sum_to_all(&total_done, &tasks_done, 1, 0, 0, npes, pwrk, psync); + shmem_long_sum_reduce(SHMEM_TEAM_WORLD, &total_done, &tasks_done, 1); int result = (total_done != ntasks * npes); if (me == 0 && result) diff --git a/test/spec-example/shmem_team_broadcast.c b/test/spec-example/shmem_team_broadcast.c index cb672ca..108afca 100644 --- a/test/spec-example/shmem_team_broadcast.c +++ b/test/spec-example/shmem_team_broadcast.c @@ -22,10 +22,11 @@ int main(void) printf("%d: %ld, %ld, %ld, %ld\n", me, dest[0], dest[1], dest[2], dest[3]); - if (me != 0) - for (int i = 0; i < 4; i++) - if (dest[i] != i) - shmem_global_exit(1); + for (int i = 0; i < 4; i++) + if (dest[i] != i) { + printf("PE %d error - wrong value (%ld != %d)\n", me, dest[i], i); + shmem_global_exit(1); + } shmem_finalize(); return 0; diff --git a/test/spec-example/shmem_team_split_2D.c b/test/spec-example/shmem_team_split_2D.c index 1565505..ec672c7 100644 --- a/test/spec-example/shmem_team_split_2D.c +++ b/test/spec-example/shmem_team_split_2D.c @@ -20,6 +20,7 @@ static void find_xy_dims(int npes, int *x, int *y) { /* Find x, y, and z such that x * y * z == npes and * abs(x - y) + abs(x - z) + abs(y - z) is minimized. */ static void find_xyz_dims(int npes, int *x, int *y, int *z) { + *x = *y = *z = 1; for(int divider = ceil(cbrt(npes)); divider >= 1; divider--) if (npes % divider == 0) { *x = divider; @@ -29,9 +30,7 @@ static void find_xyz_dims(int npes, int *x, int *y, int *z) { } int main(void) { - int xdim = 1; - int ydim = 1; - int zdim = 1; + int xdim, ydim, zdim; shmem_init(); int mype = shmem_my_pe(); diff --git a/test/unit/Makefile.am b/test/unit/Makefile.am index 9fbd4ad..e7fbb80 100644 --- a/test/unit/Makefile.am +++ b/test/unit/Makefile.am @@ -69,6 +69,7 @@ check_PROGRAMS = \ c11_test_shmem_get \ c11_test_shmem_p \ c11_test_shmem_put \ + c11_test_shmem_put_signal \ c11_test_shmem_atomic_fetch \ c11_test_shmem_atomic_set \ c11_test_shmem_atomic_add \ @@ -108,6 +109,7 @@ check_PROGRAMS = \ put_signal \ put_signal_nbi \ signal_fetch \ + signal_wait_until \ shmem_team_b2b_collectives \ shmem_team_collect_active_set \ shmem_team_max \ @@ -115,6 +117,8 @@ check_PROGRAMS = \ shmem_team_shared \ shmem_team_split_2d \ shmem_team_translate \ + shmem_team_reduce \ + shmem_team_get_config \ atomic_nbi \ fadd_nbi diff --git a/test/unit/alltoall.c b/test/unit/alltoall.c index 9ffc7d9..3e5385b 100644 --- a/test/unit/alltoall.c +++ b/test/unit/alltoall.c @@ -29,19 +29,9 @@ #include #include -long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; - -static int is_active(int pe, int pe_start, int pe_stride, int pe_size) { - int stride = 1 << pe_stride; - - return pe >= pe_start && pe < pe_start + pe_size * stride && (pe - pe_start) % stride == 0; -} - /* Tranlate a group PE index to a global PE rank. */ static int pe_group_to_world(int group_pe, int pe_start, int pe_stride, int pe_size) { - int stride = 1 << pe_stride; - - return group_pe >= pe_size ? -1 : pe_start + group_pe * stride; + return group_pe >= pe_size ? -1 : pe_start + group_pe * pe_stride; } static void alltoall_test(int32_t *out, int32_t *in, int pe_start, int pe_stride, @@ -64,13 +54,15 @@ static void alltoall_test(int32_t *out, int32_t *in, int pe_start, int pe_stride shmem_barrier_all(); - if (is_active(me, pe_start, pe_stride, pe_size)) - shmem_alltoall32(out, in, 1, pe_start, pe_stride, pe_size, pSync); + shmem_team_t new_team; + shmem_team_split_strided(SHMEM_TEAM_WORLD, pe_start, pe_stride, pe_size, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_alltoall(new_team, out, in, 1); for (i = 0; i < npes; i++) { int expected; - if (is_active(me, pe_start, pe_stride, pe_size)) + if (new_team != SHMEM_TEAM_INVALID) expected = pe_group_to_world(i, pe_start, pe_stride, pe_size); else expected = -1; @@ -87,33 +79,30 @@ static void alltoall_test(int32_t *out, int32_t *in, int pe_start, int pe_stride int main(int argc, char **argv) { - int npes, i; + int npes; int32_t *in, *out; shmem_init(); npes = shmem_n_pes(); - for (i = 0; i < SHMEM_ALLTOALL_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; - in = shmem_malloc(4 * npes); out = shmem_malloc(4 * npes); /* All PEs */ - alltoall_test(out, in, 0, 0, npes); - /* Only PE 0, stride is invalid (should be ignored) */ + alltoall_test(out, in, 0, 1, npes); + /* Only PE 0, stride is invalid (should be ignored if NPES < 13) */ alltoall_test(out, in, 0, 13, 1); /* Only even PEs */ - alltoall_test(out, in, 0, 1, npes / 2 + npes % 2); + alltoall_test(out, in, 0, 2, npes / 2 + npes % 2); if (npes > 1) { /* Remove PE n-1 */ - alltoall_test(out, in, 0, 0, npes-1); + alltoall_test(out, in, 0, 1, npes-1); /* Remove PE 0 */ - alltoall_test(out, in, 1, 0, npes-1); + alltoall_test(out, in, 1, 1, npes-1); /* Only odd PEs */ - alltoall_test(out, in, 1, 1, npes / 2); + alltoall_test(out, in, 1, 2, npes / 2); } shmem_finalize(); diff --git a/test/unit/alltoalls.c b/test/unit/alltoalls.c index aa9862e..41061de 100644 --- a/test/unit/alltoalls.c +++ b/test/unit/alltoalls.c @@ -31,19 +31,9 @@ #define NELEM 16 -long pSync[SHMEM_ALLTOALLS_SYNC_SIZE]; - -static int is_active(int pe, int pe_start, int pe_stride, int pe_size) { - int stride = 1 << pe_stride; - - return pe >= pe_start && pe < pe_start + pe_size * stride && (pe - pe_start) % stride == 0; -} - /* Tranlate a group PE index to a global PE rank. */ static int pe_group_to_world(int group_pe, int pe_start, int pe_stride, int pe_size) { - int stride = 1 << pe_stride; - - return group_pe >= pe_size ? -1 : pe_start + group_pe * stride; + return group_pe >= pe_size ? -1 : pe_start + group_pe * pe_stride; } static void alltoalls_test(int32_t *out, int32_t *in, int dst, int sst, int nelem, @@ -67,9 +57,10 @@ static void alltoalls_test(int32_t *out, int32_t *in, int dst, int sst, int nele shmem_barrier_all(); - if (is_active(me, pe_start, pe_stride, pe_size)) - shmem_alltoalls32(out, in, dst, sst, nelem, - pe_start, pe_stride, pe_size, pSync); + shmem_team_t new_team; + shmem_team_split_strided(SHMEM_TEAM_WORLD, pe_start, pe_stride, pe_size, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_alltoalls(new_team, out, in, dst, sst, nelem); for (i = 0; i < npes; i++) { int expected; @@ -77,7 +68,7 @@ static void alltoalls_test(int32_t *out, int32_t *in, int dst, int sst, int nele for (j = 0; j < nelem; j++) { for (k = 0; k < dst; k++) { int idx = i*dst*nelem + j*dst + k; - if (is_active(me, pe_start, pe_stride, pe_size)) + if (new_team != SHMEM_TEAM_INVALID) expected = (k % dst == 0) ? pe_group_to_world(i, pe_start, pe_stride, pe_size) : -1; else expected = -1; @@ -96,57 +87,54 @@ static void alltoalls_test(int32_t *out, int32_t *in, int dst, int sst, int nele int main(int argc, char **argv) { - int npes, i; + int npes; int32_t *in, *out; shmem_init(); npes = shmem_n_pes(); - for (i = 0; i < SHMEM_ALLTOALLS_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; - in = shmem_malloc(4 * NELEM * npes); out = shmem_malloc(4 * NELEM * npes); /* All PEs */ - alltoalls_test(out, in, 1, 1, 1, 0, 0, npes); /* Same as alltoall */ - alltoalls_test(out, in, 1, 1, 0, 0, 0, npes); /* No op */ - alltoalls_test(out, in, 2, 2, NELEM/2, 0, 0, npes); /* Alternate elements */ - alltoalls_test(out, in, 2, 1, NELEM/2, 0, 0, npes); /* dst != sst */ - alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 0, npes); /* dst != sst */ - /* Only PE 0, stride is invalid (should be ignored) */ + alltoalls_test(out, in, 1, 1, 1, 0, 1, npes); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 1, npes); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 1, npes); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 1, npes); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 1, npes); /* dst != sst */ + /* Only PE 0, stride is invalid (should be ignored if NPES < 13) */ alltoalls_test(out, in, 1, 1, 1, 0, 13, 1); /* Same as alltoall */ alltoalls_test(out, in, 1, 1, 0, 0, 13, 1); /* No op */ alltoalls_test(out, in, 2, 2, NELEM/2, 0, 13, 1); /* Alternate elements */ alltoalls_test(out, in, 2, 1, NELEM/2, 0, 13, 1); /* dst != sst */ alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 13, 1); /* dst != sst */ /* Only even PEs */ - alltoalls_test(out, in, 1, 1, 1, 0, 1, npes / 2 + npes % 2); /* Same as alltoall */ - alltoalls_test(out, in, 1, 1, 0, 0, 1, npes / 2 + npes % 2); /* No op */ - alltoalls_test(out, in, 2, 2, NELEM/2, 0, 1, npes / 2 + npes % 2); /* Alternate elements */ - alltoalls_test(out, in, 2, 1, NELEM/2, 0, 1, npes / 2 + npes % 2); /* dst != sst */ - alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 1, npes / 2 + npes % 2); /* dst != sst */ + alltoalls_test(out, in, 1, 1, 1, 0, 2, npes / 2 + npes % 2); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 2, npes / 2 + npes % 2); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 2, npes / 2 + npes % 2); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 2, npes / 2 + npes % 2); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 2, npes / 2 + npes % 2); /* dst != sst */ if (npes > 1) { /* Remove PE n-1 */ - alltoalls_test(out, in, 1, 1, 1, 0, 0, npes-1); /* Same as alltoall */ - alltoalls_test(out, in, 1, 1, 0, 0, 0, npes-1); /* No op */ - alltoalls_test(out, in, 2, 2, NELEM/2, 0, 0, npes-1); /* Alternate elements */ - alltoalls_test(out, in, 2, 1, NELEM/2, 0, 0, npes-1); /* dst != sst */ - alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 0, npes-1); /* dst != sst */ + alltoalls_test(out, in, 1, 1, 1, 0, 1, npes-1); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 1, npes-1); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 1, npes-1); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 1, npes-1); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 1, npes-1); /* dst != sst */ /* Remove PE 0 */ - alltoalls_test(out, in, 1, 1, 1, 1, 0, npes-1); /* Same as alltoall */ - alltoalls_test(out, in, 1, 1, 0, 1, 0, npes-1); /* No op */ - alltoalls_test(out, in, 2, 2, NELEM/2, 1, 0, npes-1); /* Alternate elements */ - alltoalls_test(out, in, 2, 1, NELEM/2, 1, 0, npes-1); /* dst != sst */ - alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 1, 0, npes-1); /* dst != sst */ + alltoalls_test(out, in, 1, 1, 1, 1, 1, npes-1); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 1, 1, npes-1); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 1, 1, npes-1); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 1, 1, npes-1); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 1, 1, npes-1); /* dst != sst */ /* Only odd PEs */ - alltoalls_test(out, in, 1, 1, 1, 1, 1, npes / 2); /* Same as alltoall */ - alltoalls_test(out, in, 1, 1, 0, 1, 1, npes / 2); /* No op */ - alltoalls_test(out, in, 2, 2, NELEM/2, 1, 1, npes / 2); /* Alternate elements */ - alltoalls_test(out, in, 2, 1, NELEM/2, 1, 1, npes / 2); /* dst != sst */ - alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 1, 1, npes / 2); /* dst != sst */ + alltoalls_test(out, in, 1, 1, 1, 1, 2, npes / 2); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 1, 2, npes / 2); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 1, 2, npes / 2); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 1, 2, npes / 2); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 1, 2, npes / 2); /* dst != sst */ } shmem_finalize(); diff --git a/test/unit/bcast.c b/test/unit/bcast.c index a27f686..b15ef1f 100644 --- a/test/unit/bcast.c +++ b/test/unit/bcast.c @@ -43,8 +43,6 @@ #include #include -long pSync[SHMEM_BCAST_SYNC_SIZE]; - #define START_BCAST_SIZE 16 #define BCAST_INCR 1024 @@ -90,10 +88,6 @@ main(int argc, char* argv[]) } } - for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { - pSync[i] = SHMEM_SYNC_VALUE; - } - if ( mpe == 0 && Verbose ) { fprintf(stderr,"%d loops\n",loops); } @@ -115,18 +109,13 @@ main(int argc, char* argv[]) shmem_barrier_all(); - shmem_broadcast64(dst, src, nLongs, 1, 0, 0, num_pes, pSync); + shmem_long_broadcast(SHMEM_TEAM_WORLD, dst, src, nLongs, 1); for(i=0; i < nLongs; i++) { - /* the root node shouldn't have the result into dst (cf specification).*/ - if (1 != mpe && dst[i] != src[i]) { + if (dst[i] != src[i]) { fprintf(stderr,"[%d] dst[%d] %ld != expected %ld\n", mpe, i, dst[i],src[i]); shmem_global_exit(1); - } else if (1 == mpe && dst[i] != 0) { - fprintf(stderr,"[%d] dst[%d] %ld != expected 0\n", - mpe, i, dst[i]); - shmem_global_exit(1); } } shmem_barrier_all(); diff --git a/test/unit/bcast_flood.c b/test/unit/bcast_flood.c index 6331a10..8d63cec 100644 --- a/test/unit/bcast_flood.c +++ b/test/unit/bcast_flood.c @@ -55,8 +55,6 @@ static double shmemx_wtime(void) { int Verbose=0; int Serialize; -long *pSync; - #define DFLT_LOOPS 600 // downsized for 'make check' //#define DFLT_LOOPS 10000 #define N_ELEMENTS 25600 /*100 KB as ints */ @@ -64,7 +62,7 @@ long *pSync; int main(int argc, char **argv) { - int i,ps,ps_cnt=2; + int i; int *target; int *source; int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; @@ -101,13 +99,6 @@ main(int argc, char **argv) return 1; } break; - case 'p': - if ((ps_cnt = atoi_scaled(optarg)) <= 0) { - fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops); - shmem_finalize(); - return 1; - } - break; case 's': Serialize++; break; @@ -125,17 +116,6 @@ main(int argc, char **argv) } } - ps_cnt *= SHMEM_BCAST_SYNC_SIZE; - pSync = shmem_malloc( ps_cnt * sizeof(long) ); - if (!pSync) { - fprintf(stderr, "ERR - null pSync pointer\n"); - shmem_global_exit(1); - } - - for (i = 0; i < ps_cnt; i++) { - pSync[i] = SHMEM_SYNC_VALUE; - } - source = (int *) shmem_malloc( elements * sizeof(*source) ); if (!source) { fprintf(stderr, "ERR - null source pointer\n"); @@ -153,26 +133,22 @@ main(int argc, char **argv) } if (me==0 && Verbose) { - fprintf(stderr,"ps_cnt %d loops %d nElems %d\n", - ps_cnt,loops,elements); + fprintf(stderr,"loops %d nElems %d\n", + loops,elements); } shmem_barrier_all(); - for(time_taken = 0.0, ps = i = 0; i < loops; i++) { + for(time_taken = 0.0, i = 0; i < loops; i++) { start_time = shmemx_wtime(); - shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]); + shmem_int_broadcast(SHMEM_TEAM_WORLD, target, source, elements, 0); if (Serialize) shmem_barrier_all(); time_taken += (shmemx_wtime() - start_time); - if (ps_cnt > 1 ) { - ps += SHMEM_BCAST_SYNC_SIZE; - if ( ps >= ps_cnt ) ps = 0; - } } if(me == 0 && Verbose) { @@ -189,7 +165,6 @@ main(int argc, char **argv) if (Verbose > 1) fprintf(stderr,"[%d] post B1\n",me); - shmem_free(pSync); shmem_free(target); shmem_free(source); @@ -228,7 +203,6 @@ usage(char *pgm) " where:\n" " -l loops (%d) loop count.\n" " -e ints # of integers to broadcast\n" - " -p cnt # of pSync[] elements\n" " -v be verbose, multiple 'v' more verbose\n" " -h this text.\n", pgm,DFLT_LOOPS); diff --git a/test/unit/big_reduction.c b/test/unit/big_reduction.c index 51fd5cc..c76d343 100644 --- a/test/unit/big_reduction.c +++ b/test/unit/big_reduction.c @@ -40,8 +40,6 @@ #include #include -long pSync[SHMEM_REDUCE_SYNC_SIZE]; - #define N 128 long src[N]; @@ -50,8 +48,6 @@ long dst[N]; #define MAX(a, b) ((a) > (b)) ? (a) : (b) #define WRK_SIZE MAX(N/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) -long pWrk[WRK_SIZE]; - int main(int argc, char* argv[]) { @@ -74,10 +70,6 @@ main(int argc, char* argv[]) } } - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1) { - pSync[i] = SHMEM_SYNC_VALUE; - } - shmem_init(); for (i = 0; i < N; i += 1) { @@ -85,7 +77,7 @@ main(int argc, char* argv[]) } shmem_barrier_all(); - shmem_long_max_to_all(dst, src, N, 0, 0, shmem_n_pes(), pWrk, pSync); + shmem_long_max_reduce(SHMEM_TEAM_WORLD, dst, src, N); if (Verbose) { printf("%d/%d\tdst =", shmem_my_pe(), shmem_n_pes() ); diff --git a/test/unit/bigput.c b/test/unit/bigput.c index ce37249..7ca0957 100644 --- a/test/unit/bigput.c +++ b/test/unit/bigput.c @@ -52,9 +52,6 @@ int Track; int elements = NUM_ELEMENTS; double sum_time, time_taken; -double pWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; -long pSync[SHMEM_REDUCE_SYNC_SIZE]; - static int atoi_scaled(char *s) { @@ -159,9 +156,6 @@ main(int argc, char **argv) } } - for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; - target_PE = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); @@ -222,7 +216,7 @@ main(int argc, char **argv) // collect time per node. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); - shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync); + shmem_double_sum_reduce(SHMEM_TEAM_WORLD, &sum_time, &time_taken, 1); shmem_barrier_all(); @@ -250,7 +244,7 @@ main(int argc, char **argv) sum_time /= (double)npes; comp_time /= (double)npes; if (sum_time != comp_time) - printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n", + printf("%s: computed_time %7.5f != sum_reduce_time %7.5f)\n", pgm, comp_time, sum_time ); rate = ((double)bytes/(1024.0*1024.0)) / comp_time; diff --git a/test/unit/broadcast_active_set.c b/test/unit/broadcast_active_set.c index 3365367..c40eba7 100644 --- a/test/unit/broadcast_active_set.c +++ b/test/unit/broadcast_active_set.c @@ -32,15 +32,36 @@ #define NELEM 10 +#ifdef ENABLE_DEPRECATED_TESTS long bcast_psync[SHMEM_BCAST_SYNC_SIZE]; /* Note: Need to alternate psync arrays because the active set changes */ long barrier_psync0[SHMEM_BARRIER_SYNC_SIZE]; long barrier_psync1[SHMEM_BARRIER_SYNC_SIZE]; +#endif int64_t src[NELEM]; int64_t dst[NELEM]; +/* Validate broadcasted data */ +static int validate_data(int i) { + int errors = 0; + int j; + for (j = 0; j < NELEM; j++) { +#ifdef ENABLE_DEPRECATED_TESTS + int64_t expected = (shmem_my_pe() == i) ? i-1 : i; +#else + int64_t expected = i; +#endif + if (dst[j] != expected) { + printf("%d: Expected dst[%d] = %"PRId64", got dst[%d] = %"PRId64", iteration %d\n", + shmem_my_pe(), j, expected, j, dst[j], i); + errors++; + } + } + return errors; +} + int main(void) { int i, me, npes; @@ -56,6 +77,7 @@ int main(void) dst[i] = -1; } +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) bcast_psync[i] = SHMEM_SYNC_VALUE; @@ -63,6 +85,7 @@ int main(void) barrier_psync0[i] = SHMEM_SYNC_VALUE; barrier_psync1[i] = SHMEM_SYNC_VALUE; } +#endif if (me == 0) printf("Shrinking active set test\n"); @@ -71,26 +94,37 @@ int main(void) /* A total of npes tests are performed, where the active set in each test * includes PEs i..npes-1 */ +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i <= me; i++) { - int j; - if (me == i) - printf(" + active set size %d\n", npes-i); + if (me == i) { + printf("+ active set size %d\n", npes-i); + } shmem_broadcast64(dst, src, NELEM, 0, i, 0, npes-i, bcast_psync); - /* Validate broadcasted data */ - for (j = 0; j < NELEM; j++) { - int64_t expected = (me == i) ? i-1 : i; - if (dst[j] != expected) { - printf("%d: Expected dst[%d] = %"PRId64", got dst[%d] = %"PRId64", iteration %d\n", - me, j, expected, j, dst[j], i); - errors++; - } + errors += validate_data(i); + shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + } +#else + shmem_team_t new_team; + for (i = 0; i < npes; i++) { + + if (me == i) { + printf(" + active set size %d\n", npes-i); } - shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + shmem_team_split_strided(SHMEM_TEAM_WORLD, i, 1, npes-i, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) { + shmem_int64_broadcast(new_team, dst, src, NELEM, 0); + + errors += validate_data(i); + } + + shmem_barrier_all(); + } +#endif shmem_barrier_all(); @@ -104,24 +138,19 @@ int main(void) /* A total of npes tests are performed, where the root changes each time */ for (i = 0; i < npes; i++) { - int j; if (me == i) printf(" + root %d\n", i); +#ifdef ENABLE_DEPRECATED_TESTS shmem_broadcast64(dst, src, NELEM, i, 0, 0, npes, bcast_psync); +#else + shmem_int64_broadcast(SHMEM_TEAM_WORLD, dst, src, NELEM, i); +#endif - /* Validate broadcasted data */ - for (j = 0; j < NELEM; j++) { - int64_t expected = (me == i) ? i-1 : i; - if (dst[j] != expected) { - printf("%d: Expected dst[%d] = %"PRId64", got dst[%d] = %"PRId64", iteration %d\n", - me, j, expected, j, dst[j], i); - errors++; - } - } + errors += validate_data(i); - shmem_barrier(0, 0, npes, barrier_psync0); + shmem_barrier_all(); } shmem_finalize(); diff --git a/test/unit/c11_shmem_team_reduce.c b/test/unit/c11_shmem_team_reduce.c index 423536f..a00d6b3 100644 --- a/test/unit/c11_shmem_team_reduce.c +++ b/test/unit/c11_shmem_team_reduce.c @@ -172,81 +172,148 @@ int main(void) { } TEST_SHMEM_REDUCE(and, unsigned char); - TEST_SHMEM_REDUCE(and, short); TEST_SHMEM_REDUCE(and, unsigned short); - TEST_SHMEM_REDUCE(and, int); TEST_SHMEM_REDUCE(and, unsigned int); - TEST_SHMEM_REDUCE(and, long); TEST_SHMEM_REDUCE(and, unsigned long); - TEST_SHMEM_REDUCE(and, long long); TEST_SHMEM_REDUCE(and, unsigned long long); + TEST_SHMEM_REDUCE(and, int8_t); + TEST_SHMEM_REDUCE(and, int16_t); + TEST_SHMEM_REDUCE(and, int32_t); + TEST_SHMEM_REDUCE(and, int64_t); + TEST_SHMEM_REDUCE(and, uint8_t); + TEST_SHMEM_REDUCE(and, uint16_t); + TEST_SHMEM_REDUCE(and, uint32_t); + TEST_SHMEM_REDUCE(and, uint64_t); + TEST_SHMEM_REDUCE(and, size_t); TEST_SHMEM_REDUCE(or, unsigned char); - TEST_SHMEM_REDUCE(or, short); TEST_SHMEM_REDUCE(or, unsigned short); - TEST_SHMEM_REDUCE(or, int); TEST_SHMEM_REDUCE(or, unsigned int); - TEST_SHMEM_REDUCE(or, long); TEST_SHMEM_REDUCE(or, unsigned long); - TEST_SHMEM_REDUCE(or, long long); TEST_SHMEM_REDUCE(or, unsigned long long); + TEST_SHMEM_REDUCE(or, int8_t); + TEST_SHMEM_REDUCE(or, int16_t); + TEST_SHMEM_REDUCE(or, int32_t); + TEST_SHMEM_REDUCE(or, int64_t); + TEST_SHMEM_REDUCE(or, uint8_t); + TEST_SHMEM_REDUCE(or, uint16_t); + TEST_SHMEM_REDUCE(or, uint32_t); + TEST_SHMEM_REDUCE(or, uint64_t); + TEST_SHMEM_REDUCE(or, size_t); TEST_SHMEM_REDUCE(xor, unsigned char); - TEST_SHMEM_REDUCE(xor, short); TEST_SHMEM_REDUCE(xor, unsigned short); - TEST_SHMEM_REDUCE(xor, int); TEST_SHMEM_REDUCE(xor, unsigned int); - TEST_SHMEM_REDUCE(xor, long); TEST_SHMEM_REDUCE(xor, unsigned long); - TEST_SHMEM_REDUCE(xor, long long); TEST_SHMEM_REDUCE(xor, unsigned long long); + TEST_SHMEM_REDUCE(xor, int8_t); + TEST_SHMEM_REDUCE(xor, int16_t); + TEST_SHMEM_REDUCE(xor, int32_t); + TEST_SHMEM_REDUCE(xor, int64_t); + TEST_SHMEM_REDUCE(xor, uint8_t); + TEST_SHMEM_REDUCE(xor, uint16_t); + TEST_SHMEM_REDUCE(xor, uint32_t); + TEST_SHMEM_REDUCE(xor, uint64_t); + TEST_SHMEM_REDUCE(xor, size_t); + TEST_SHMEM_REDUCE(max, char); + TEST_SHMEM_REDUCE(max, signed char); TEST_SHMEM_REDUCE(max, short); - TEST_SHMEM_REDUCE(max, unsigned short); TEST_SHMEM_REDUCE(max, int); - TEST_SHMEM_REDUCE(max, unsigned int); TEST_SHMEM_REDUCE(max, long); - TEST_SHMEM_REDUCE(max, unsigned long); TEST_SHMEM_REDUCE(max, long long); + TEST_SHMEM_REDUCE(max, ptrdiff_t); + TEST_SHMEM_REDUCE(max, unsigned char); + TEST_SHMEM_REDUCE(max, unsigned short); + TEST_SHMEM_REDUCE(max, unsigned int); + TEST_SHMEM_REDUCE(max, unsigned long); TEST_SHMEM_REDUCE(max, unsigned long long); + TEST_SHMEM_REDUCE(max, int8_t); + TEST_SHMEM_REDUCE(max, int16_t); + TEST_SHMEM_REDUCE(max, int32_t); + TEST_SHMEM_REDUCE(max, int64_t); + TEST_SHMEM_REDUCE(max, uint8_t); + TEST_SHMEM_REDUCE(max, uint16_t); + TEST_SHMEM_REDUCE(max, uint32_t); + TEST_SHMEM_REDUCE(max, uint64_t); + TEST_SHMEM_REDUCE(max, size_t); TEST_SHMEM_REDUCE(max, float); TEST_SHMEM_REDUCE(max, double); TEST_SHMEM_REDUCE(max, long double); + TEST_SHMEM_REDUCE(min, char); + TEST_SHMEM_REDUCE(min, signed char); TEST_SHMEM_REDUCE(min, short); - TEST_SHMEM_REDUCE(min, unsigned short); TEST_SHMEM_REDUCE(min, int); - TEST_SHMEM_REDUCE(min, unsigned int); TEST_SHMEM_REDUCE(min, long); - TEST_SHMEM_REDUCE(min, unsigned long); TEST_SHMEM_REDUCE(min, long long); + TEST_SHMEM_REDUCE(min, ptrdiff_t); + TEST_SHMEM_REDUCE(min, unsigned char); + TEST_SHMEM_REDUCE(min, unsigned short); + TEST_SHMEM_REDUCE(min, unsigned int); + TEST_SHMEM_REDUCE(min, unsigned long); TEST_SHMEM_REDUCE(min, unsigned long long); + TEST_SHMEM_REDUCE(min, int8_t); + TEST_SHMEM_REDUCE(min, int16_t); + TEST_SHMEM_REDUCE(min, int32_t); + TEST_SHMEM_REDUCE(min, int64_t); + TEST_SHMEM_REDUCE(min, uint8_t); + TEST_SHMEM_REDUCE(min, uint16_t); + TEST_SHMEM_REDUCE(min, uint32_t); + TEST_SHMEM_REDUCE(min, uint64_t); + TEST_SHMEM_REDUCE(min, size_t); TEST_SHMEM_REDUCE(min, float); TEST_SHMEM_REDUCE(min, double); TEST_SHMEM_REDUCE(min, long double); + TEST_SHMEM_REDUCE(sum, char); + TEST_SHMEM_REDUCE(sum, signed char); TEST_SHMEM_REDUCE(sum, short); - TEST_SHMEM_REDUCE(sum, unsigned short); TEST_SHMEM_REDUCE(sum, int); - TEST_SHMEM_REDUCE(sum, unsigned int); TEST_SHMEM_REDUCE(sum, long); - TEST_SHMEM_REDUCE(sum, unsigned long); TEST_SHMEM_REDUCE(sum, long long); + TEST_SHMEM_REDUCE(sum, ptrdiff_t); + TEST_SHMEM_REDUCE(sum, unsigned char); + TEST_SHMEM_REDUCE(sum, unsigned short); + TEST_SHMEM_REDUCE(sum, unsigned int); + TEST_SHMEM_REDUCE(sum, unsigned long); TEST_SHMEM_REDUCE(sum, unsigned long long); + TEST_SHMEM_REDUCE(sum, int8_t); + TEST_SHMEM_REDUCE(sum, int16_t); + TEST_SHMEM_REDUCE(sum, int32_t); + TEST_SHMEM_REDUCE(sum, int64_t); + TEST_SHMEM_REDUCE(sum, uint8_t); + TEST_SHMEM_REDUCE(sum, uint16_t); + TEST_SHMEM_REDUCE(sum, uint32_t); + TEST_SHMEM_REDUCE(sum, uint64_t); + TEST_SHMEM_REDUCE(sum, size_t); TEST_SHMEM_REDUCE(sum, float); TEST_SHMEM_REDUCE(sum, double); TEST_SHMEM_REDUCE(sum, long double); TEST_SHMEM_REDUCE(sum, double _Complex); TEST_SHMEM_REDUCE(sum, float _Complex); + TEST_SHMEM_REDUCE(prod, char); + TEST_SHMEM_REDUCE(prod, signed char); TEST_SHMEM_REDUCE(prod, short); - TEST_SHMEM_REDUCE(prod, unsigned short); TEST_SHMEM_REDUCE(prod, int); - TEST_SHMEM_REDUCE(prod, unsigned int); TEST_SHMEM_REDUCE(prod, long); - TEST_SHMEM_REDUCE(prod, unsigned long); TEST_SHMEM_REDUCE(prod, long long); + TEST_SHMEM_REDUCE(prod, ptrdiff_t); + TEST_SHMEM_REDUCE(prod, unsigned char); + TEST_SHMEM_REDUCE(prod, unsigned short); + TEST_SHMEM_REDUCE(prod, unsigned int); + TEST_SHMEM_REDUCE(prod, unsigned long); TEST_SHMEM_REDUCE(prod, unsigned long long); + TEST_SHMEM_REDUCE(prod, int8_t); + TEST_SHMEM_REDUCE(prod, int16_t); + TEST_SHMEM_REDUCE(prod, int32_t); + TEST_SHMEM_REDUCE(prod, int64_t); + TEST_SHMEM_REDUCE(prod, uint8_t); + TEST_SHMEM_REDUCE(prod, uint16_t); + TEST_SHMEM_REDUCE(prod, uint32_t); + TEST_SHMEM_REDUCE(prod, uint64_t); + TEST_SHMEM_REDUCE(prod, size_t); TEST_SHMEM_REDUCE(prod, float); TEST_SHMEM_REDUCE(prod, double); TEST_SHMEM_REDUCE(prod, long double); diff --git a/test/unit/c11_test_shmem_put_signal.c b/test/unit/c11_test_shmem_put_signal.c new file mode 100644 index 0000000..1a68463 --- /dev/null +++ b/test/unit/c11_test_shmem_put_signal.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2021 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Validate shmem_put_signal operation through blocking and non-blocking + * APIs. +*/ + +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { PUT_SIGNAL = 0, PUT_SIGNAL_NBI }; + +#define TEST_SHMEM_PUT_SIGNAL(OP, USE_CTX, SIGNAL_OP, TYPE) \ + do { \ + TYPE *remote = (TYPE *) shmem_malloc(10 * sizeof(TYPE)); \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE local[10]; \ + static uint64_t sig_addr = 0; \ + for (int i = 0; i < 10; i++) \ + local[i] = (TYPE) i; \ + int atomic_op = (SIGNAL_OP == 0) ? SHMEM_SIGNAL_SET : SHMEM_SIGNAL_ADD; \ + shmem_barrier_all(); \ + switch (OP) { \ + case PUT_SIGNAL: \ + if (USE_CTX) { \ + if (mype == 0) { \ + shmem_put_signal(SHMEM_CTX_DEFAULT, remote, local, 10, &sig_addr, (uint64_t) 1, \ + atomic_op, (mype + 1) % npes); \ + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); \ + } else { \ + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); \ + shmem_put_signal(SHMEM_CTX_DEFAULT, remote, remote, 10, &sig_addr, (uint64_t) 1,\ + atomic_op, (mype + 1) % npes); \ + } \ + } \ + else { \ + if (mype == 0) { \ + shmem_put_signal(remote, local, 10, &sig_addr, (uint64_t) 1, atomic_op, \ + (mype + 1) % npes); \ + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); \ + } else { \ + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); \ + shmem_put_signal(remote, remote, 10, &sig_addr, (uint64_t) 1, atomic_op, \ + (mype + 1) % npes); \ + } \ + } \ + break; \ + case PUT_SIGNAL_NBI: \ + if (USE_CTX) { \ + if (mype == 0) { \ + for (int i = 0; i < npes; i++) { \ + shmem_put_signal_nbi(SHMEM_CTX_DEFAULT, remote, local, 10, &sig_addr, \ + (uint64_t) 1, atomic_op, i); \ + } \ + shmem_quiet(); \ + } \ + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); \ + } else { \ + if (mype == 0) { \ + for (int i = 0; i < npes; i++) { \ + shmem_put_signal_nbi(remote, local, 10, &sig_addr, (uint64_t) 1, \ + atomic_op, i); \ + } \ + shmem_quiet(); \ + } \ + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + sig_addr = 0; \ + shmem_barrier_all(); \ + for (int i = 0; i < 10; i++) \ + if (remote[i] != (TYPE)(i)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_PUT_SIGNAL(%s, %d, %s)\n", mype, \ + #OP, (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + shmem_free(remote); \ + } while (0) + +#else +#define TEST_SHMEM_PUT_SIGNAL(OP, USE_CTX, SIGNAL_OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + + /* Loop over on context usage */ + for (int i = 0; i < 2; i++) { + /* Loop over on atomic ops */ + for (int j = 0; j < 2; j++) { + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, float); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, double); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, long double); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, char); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, signed char); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, short); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, int); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, long long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, unsigned char); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, unsigned short); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, unsigned int); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, unsigned long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, unsigned long long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, int8_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, int16_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, int32_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, int64_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, uint8_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, uint16_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, uint32_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, uint64_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, size_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL, i, j, ptrdiff_t); + } + } + + /* Loop over on context usage */ + for (int i = 0; i < 2; i++) { + /* Loop over on atomic ops */ + for (int j = 0; j < 2; j++) { + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, float); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, double); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, long double); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, char); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, signed char); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, short); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, int); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, long long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, unsigned char); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, unsigned short); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, unsigned int); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, unsigned long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, unsigned long long); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, int8_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, int16_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, int32_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, int64_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, uint8_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, uint16_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, uint32_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, uint64_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, size_t); + TEST_SHMEM_PUT_SIGNAL(PUT_SIGNAL_NBI, i, j, ptrdiff_t); + } + } + + shmem_finalize(); + return rc; +} diff --git a/test/unit/collect.c b/test/unit/collect.c index 9857073..0c350ee 100644 --- a/test/unit/collect.c +++ b/test/unit/collect.c @@ -72,7 +72,7 @@ int main(int argc, char **argv) { /* TEST: All PEs contribute their PE id */ src[0] = me; - shmem_collect32(dst, src, 1, 0, 0, npes, pSync); + shmem_int32_collect(SHMEM_TEAM_WORLD, dst, src, 1); for (i = 0; i < npes; i++) { if (dst[i] != i) { @@ -87,8 +87,10 @@ int main(int argc, char **argv) { /* TEST: Even PEs contribute their PE id */ src[0] = me; - if (me % 2 == 0) { - shmem_collect32(dst, src, 1, 0, 1, npes/2 + npes%2, pSync); + shmem_team_t new_team; + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes/2 + npes%2, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) { + shmem_int32_collect(new_team, dst, src, 1); for (i = 0; i < npes/2; i++) { if (dst[i] != i*2) { @@ -105,7 +107,7 @@ int main(int argc, char **argv) { for (i = 0; i < me; i++) src[i] = me+1; - shmem_collect32(dst, src, me, 0, 0, npes, pSync); + shmem_int32_collect(SHMEM_TEAM_WORLD, dst, src, me); int idx = 0; for (i = 0; i < npes; i++) { diff --git a/test/unit/collect_active_set.c b/test/unit/collect_active_set.c index 3286123..cd968c8 100644 --- a/test/unit/collect_active_set.c +++ b/test/unit/collect_active_set.c @@ -32,15 +32,48 @@ #define MAX_NPES 32 +#ifdef ENABLE_DEPRECATED_TESTS long collect_psync[SHMEM_COLLECT_SYNC_SIZE]; /* Note: Need to alternate psync arrays because the active set changes */ long barrier_psync0[SHMEM_BARRIER_SYNC_SIZE]; long barrier_psync1[SHMEM_BARRIER_SYNC_SIZE]; +#endif int64_t src[MAX_NPES]; int64_t dst[MAX_NPES*MAX_NPES]; +/* Validate broadcasted data */ +static int validate_data(int i, int me, int npes) { + int idx = 0; + int errors = 0; + /* Validate destination buffer data */ + for (int j = 0; j < npes - i; j++) { + for (int k = 0; k < i+j; k++, idx++) { + if (dst[idx] != i+j) { + printf("%d: Expected dst[%d] = %d, got dst[%d] = %"PRId64", iteration %d\n", + me, idx, i+j, idx, dst[idx], i); + errors++; + } + } + } + + /* Validate unused destination buffer */ + for ( ; idx < MAX_NPES*MAX_NPES; idx++) { + if (dst[idx] != -1) { + printf("%d: Expected dst[%d] = %d, got dst[%d] = %"PRId64", iteration %d\n", + me, idx, -1, idx, dst[idx], i); + errors++; + } + } + + /* Reset for next iteration */ + for (int j = 0; j < MAX_NPES*MAX_NPES; j++) + dst[j] = -1; + + return errors; +} + int main(void) { int i, me, npes; @@ -64,6 +97,7 @@ int main(void) for (i = 0; i < MAX_NPES*MAX_NPES; i++) dst[i] = -1; +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) collect_psync[i] = SHMEM_SYNC_VALUE; @@ -71,6 +105,7 @@ int main(void) barrier_psync0[i] = SHMEM_SYNC_VALUE; barrier_psync1[i] = SHMEM_SYNC_VALUE; } +#endif if (me == 0) printf("Shrinking active set test\n"); @@ -79,41 +114,30 @@ int main(void) /* A total of npes tests are performed, where the active set in each test * includes PEs i..npes-1 and each PE contributes PE ID elements */ +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i <= me; i++) { - int j, k; - int idx = 0; - if (me == i) printf(" + active set size %d\n", npes-i); shmem_collect64(dst, src, me, i, 0, npes-i, collect_psync); + errors += validate_data(i, me, npes); - /* Validate destination buffer data */ - for (j = 0; j < npes - i; j++) { - for (k = 0; k < i+j; k++, idx++) { - if (dst[idx] != i+j) { - printf("%d: Expected dst[%d] = %d, got dst[%d] = %"PRId64", iteration %d\n", - me, idx, i+j, idx, dst[idx], i); - errors++; - } - } - } + shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + } +#else + shmem_team_t new_team; + for (i = 0; i < npes; i++) { + if (me == i) + printf(" + active set size %d\n", npes-i); - /* Validate unused destination buffer */ - for ( ; idx < MAX_NPES*MAX_NPES; idx++) { - if (dst[idx] != -1) { - printf("%d: Expected dst[%d] = %d, got dst[%d] = %"PRId64", iteration %d\n", - me, idx, -1, idx, dst[idx], i); - errors++; - } + shmem_team_split_strided(SHMEM_TEAM_WORLD, i, 1, npes-i, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) { + shmem_int64_collect(new_team, dst, src, me); + errors += validate_data(i, me, npes); } - - /* Reset for next iteration */ - for (j = 0; j < MAX_NPES*MAX_NPES; j++) - dst[j] = -1; - - shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + shmem_barrier_all(); } +#endif shmem_finalize(); diff --git a/test/unit/cxx_test_shmem_atomic_add.cpp b/test/unit/cxx_test_shmem_atomic_add.cpp index 50772e8..b18e2e5 100644 --- a/test/unit/cxx_test_shmem_atomic_add.cpp +++ b/test/unit/cxx_test_shmem_atomic_add.cpp @@ -148,6 +148,19 @@ int main(int argc, char* argv[]) { TEST_SHMEM_ADD(ADD, uint64_t, uint64); TEST_SHMEM_ADD(ADD, size_t, size); TEST_SHMEM_ADD(ADD, ptrdiff_t, ptrdiff); + + TEST_SHMEM_ADD(FADD, int, int); + TEST_SHMEM_ADD(FADD, long, long); + TEST_SHMEM_ADD(FADD, long long, longlong); + TEST_SHMEM_ADD(FADD, unsigned int, uint); + TEST_SHMEM_ADD(FADD, unsigned long, ulong); + TEST_SHMEM_ADD(FADD, unsigned long long, ulonglong); + TEST_SHMEM_ADD(FADD, int32_t, int32); + TEST_SHMEM_ADD(FADD, int64_t, int64); + TEST_SHMEM_ADD(FADD, uint32_t, uint32); + TEST_SHMEM_ADD(FADD, uint64_t, uint64); + TEST_SHMEM_ADD(FADD, size_t, size); + TEST_SHMEM_ADD(FADD, ptrdiff_t, ptrdiff); #endif /* ENABLE_DEPRECATED_TESTS */ TEST_SHMEM_ADD(ATOMIC_ADD, int, int); @@ -176,19 +189,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_ADD(CTX_ATOMIC_ADD, size_t, size); TEST_SHMEM_ADD(CTX_ATOMIC_ADD, ptrdiff_t, ptrdiff); - TEST_SHMEM_ADD(FADD, int, int); - TEST_SHMEM_ADD(FADD, long, long); - TEST_SHMEM_ADD(FADD, long long, longlong); - TEST_SHMEM_ADD(FADD, unsigned int, uint); - TEST_SHMEM_ADD(FADD, unsigned long, ulong); - TEST_SHMEM_ADD(FADD, unsigned long long, ulonglong); - TEST_SHMEM_ADD(FADD, int32_t, int32); - TEST_SHMEM_ADD(FADD, int64_t, int64); - TEST_SHMEM_ADD(FADD, uint32_t, uint32); - TEST_SHMEM_ADD(FADD, uint64_t, uint64); - TEST_SHMEM_ADD(FADD, size_t, size); - TEST_SHMEM_ADD(FADD, ptrdiff_t, ptrdiff); - TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int, int); TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, long, long); TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, long long, longlong); diff --git a/test/unit/cxx_test_shmem_atomic_inc.cpp b/test/unit/cxx_test_shmem_atomic_inc.cpp index e0715fe..ef95d33 100644 --- a/test/unit/cxx_test_shmem_atomic_inc.cpp +++ b/test/unit/cxx_test_shmem_atomic_inc.cpp @@ -146,6 +146,19 @@ int main(int argc, char* argv[]) { TEST_SHMEM_INC(INC, uint64_t, uint64); TEST_SHMEM_INC(INC, size_t, size); TEST_SHMEM_INC(INC, ptrdiff_t, ptrdiff); + + TEST_SHMEM_INC(FINC, int, int); + TEST_SHMEM_INC(FINC, long, long); + TEST_SHMEM_INC(FINC, long long, longlong); + TEST_SHMEM_INC(FINC, unsigned int, uint); + TEST_SHMEM_INC(FINC, unsigned long, ulong); + TEST_SHMEM_INC(FINC, unsigned long long, ulonglong); + TEST_SHMEM_INC(FINC, int32_t, int32); + TEST_SHMEM_INC(FINC, int64_t, int64); + TEST_SHMEM_INC(FINC, uint32_t, uint32); + TEST_SHMEM_INC(FINC, uint64_t, uint64); + TEST_SHMEM_INC(FINC, size_t, size); + TEST_SHMEM_INC(FINC, ptrdiff_t, ptrdiff); #endif /* ENABLE_DEPRECATED_TESTS */ TEST_SHMEM_INC(ATOMIC_INC, int, int); @@ -174,19 +187,6 @@ int main(int argc, char* argv[]) { TEST_SHMEM_INC(CTX_ATOMIC_INC, size_t, size); TEST_SHMEM_INC(CTX_ATOMIC_INC, ptrdiff_t, ptrdiff); - TEST_SHMEM_INC(FINC, int, int); - TEST_SHMEM_INC(FINC, long, long); - TEST_SHMEM_INC(FINC, long long, longlong); - TEST_SHMEM_INC(FINC, unsigned int, uint); - TEST_SHMEM_INC(FINC, unsigned long, ulong); - TEST_SHMEM_INC(FINC, unsigned long long, ulonglong); - TEST_SHMEM_INC(FINC, int32_t, int32); - TEST_SHMEM_INC(FINC, int64_t, int64); - TEST_SHMEM_INC(FINC, uint32_t, uint32); - TEST_SHMEM_INC(FINC, uint64_t, uint64); - TEST_SHMEM_INC(FINC, size_t, size); - TEST_SHMEM_INC(FINC, ptrdiff_t, ptrdiff); - TEST_SHMEM_INC(ATOMIC_FETCH_INC, int, int); TEST_SHMEM_INC(ATOMIC_FETCH_INC, long, long); TEST_SHMEM_INC(ATOMIC_FETCH_INC, long long, longlong); diff --git a/test/unit/cxx_test_shmem_complex.cpp b/test/unit/cxx_test_shmem_complex.cpp index 0cf2f7a..dfaaee1 100644 --- a/test/unit/cxx_test_shmem_complex.cpp +++ b/test/unit/cxx_test_shmem_complex.cpp @@ -59,8 +59,8 @@ long syncArr[SHMEM_REDUCE_SYNC_SIZE]; \ memset(TYPE##_src,0,sizeof(TYPE##_src)); \ \ - shmem_complex##LETTER##_##OP##_to_all(TYPE##_dest,TYPE##_src,10, \ - 0,0, shmem_n_pes(), TYPE##_workData, syncArr); \ + shmem_complex##LETTER##_##OP##_reduce(SHMEM_TEAM_WORLD, \ + TYPE##_dest,TYPE##_src,10); \ \ shmem_barrier_all(); \ \ diff --git a/test/unit/fcollect64.c b/test/unit/fcollect64.c index a82aaf7..4045e27 100644 --- a/test/unit/fcollect64.c +++ b/test/unit/fcollect64.c @@ -71,7 +71,6 @@ int Verbose; long *dst; long *src; -long pSync[SHMEM_COLLECT_SYNC_SIZE]; static int atoi_scaled(char *s) @@ -169,9 +168,6 @@ main(int argc, char* argv[]) return 1; } - for (c = 0; c < SHMEM_COLLECT_SYNC_SIZE;c++) - pSync[c] = SHMEM_SYNC_VALUE; - if (Verbose && mpe == 0) fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n", loops,nWords,nIncr); @@ -193,7 +189,7 @@ main(int argc, char* argv[]) shmem_barrier_all(); - shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync); + shmem_long_fcollect(SHMEM_TEAM_WORLD, dst,src,nWords); // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1 for(j=0; j < (nWords*num_pes); j++) { diff --git a/test/unit/max_reduction.c b/test/unit/max_reduction.c index b500807..57cbf11 100644 --- a/test/unit/max_reduction.c +++ b/test/unit/max_reduction.c @@ -38,8 +38,6 @@ #include #include -long pSync[SHMEM_REDUCE_SYNC_SIZE]; - #define N 3 long src[N]; @@ -48,8 +46,6 @@ long dst[N]; #define MAX(a, b) ((a) > (b)) ? (a) : (b) #define WRK_SIZE MAX(N/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) -long pWrk[WRK_SIZE]; - int main(int argc, char* argv[]) { @@ -72,10 +68,6 @@ main(int argc, char* argv[]) } } - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1) { - pSync[i] = SHMEM_SYNC_VALUE; - } - shmem_init(); for (i = 0; i < N; i += 1) { @@ -83,7 +75,7 @@ main(int argc, char* argv[]) } shmem_barrier_all(); - shmem_long_max_to_all(dst, src, N, 0, 0, shmem_n_pes(), pWrk, pSync); + shmem_long_max_reduce(SHMEM_TEAM_WORLD, dst, src, N); if (Verbose) { printf("%d/%d\tdst =", shmem_my_pe(), shmem_n_pes() ); diff --git a/test/unit/mt_membar.c b/test/unit/mt_membar.c index d5efb67..4505e77 100644 --- a/test/unit/mt_membar.c +++ b/test/unit/mt_membar.c @@ -229,7 +229,7 @@ int main(int argc, char **argv) { pthread_barrier_destroy(&fencebar); shmem_barrier_all(); - shmem_int_sum_to_all(&sum_error, &errors, 1, 0, 0, npes, pWrk, pSync); + shmem_int_sum_reduce(SHMEM_TEAM_WORLD, &sum_error, &errors, 1); shmem_finalize(); return (sum_error == 0) ? 0 : 1; diff --git a/test/unit/nop_collectives.c b/test/unit/nop_collectives.c index f591e48..70b4604 100644 --- a/test/unit/nop_collectives.c +++ b/test/unit/nop_collectives.c @@ -38,8 +38,10 @@ long alltoalls_psync[SHMEM_ALLTOALLS_SYNC_SIZE]; int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; int main(void) { - int i; - int me, npes; + int me; + +#ifdef ENABLE_DEPRECATED_TESTS + int i, npes; for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) bcast_psync[i] = SHMEM_SYNC_VALUE; @@ -58,56 +60,128 @@ int main(void) { for (i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) pwrk[i] = 0; +#endif shmem_init(); me = shmem_my_pe(); + +#ifdef ENABLE_DEPRECATED_TESTS npes = shmem_n_pes(); +#endif if (me == 0) printf("Testing zero length collectives\n"); if (me == 0) printf(" + broadcast\n"); +#ifdef ENABLE_DEPRECATED_TESTS shmem_broadcast32(NULL, NULL, 0, 0, 0, 0, npes, bcast_psync); +#else + shmem_int_broadcast(SHMEM_TEAM_WORLD, NULL, NULL, 0, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_broadcast64(NULL, NULL, 0, 0, 0, 0, npes, bcast_psync); +#else + shmem_long_broadcast(SHMEM_TEAM_WORLD, NULL, NULL, 0, 0); +#endif shmem_barrier_all(); if (me == 0) printf(" + collect\n"); +#ifdef ENABLE_DEPRECATED_TESTS shmem_fcollect32(NULL, NULL, 0, 0, 0, npes, collect_psync); +#else + shmem_int_fcollect(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_fcollect64(NULL, NULL, 0, 0, 0, npes, collect_psync); +#else + shmem_long_fcollect(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_collect32(NULL, NULL, 0, 0, 0, npes, collect_psync); +#else + shmem_int_collect(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_collect64(NULL, NULL, 0, 0, 0, npes, collect_psync); +#else + shmem_long_collect(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); if (me == 0) printf(" + reduction\n"); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_and_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_and_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_or_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_or_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_xor_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_xor_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_min_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_min_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_max_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_max_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_sum_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_sum_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_prod_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); +#else + shmem_int_prod_reduce(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); if (me == 0) printf(" + all-to-all\n"); +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoall32(NULL, NULL, 0, 0, 0, npes, alltoall_psync); +#else + shmem_int_alltoall(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoall64(NULL, NULL, 0, 0, 0, npes, alltoall_psync); +#else + shmem_long_alltoall(SHMEM_TEAM_WORLD, NULL, NULL, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoalls32(NULL, NULL, 1, 1, 0, 0, 0, npes, alltoalls_psync); +#else + shmem_int_alltoalls(SHMEM_TEAM_WORLD, NULL, NULL, 1, 1, 0); +#endif shmem_barrier_all(); +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoalls64(NULL, NULL, 1, 1, 0, 0, 0, npes, alltoalls_psync); +#else + shmem_long_alltoalls(SHMEM_TEAM_WORLD, NULL, NULL, 1, 1, 0); +#endif if (me == 0) printf("Done\n"); diff --git a/test/unit/pingpong-short.c b/test/unit/pingpong-short.c index 7394eb3..df32b89 100644 --- a/test/unit/pingpong-short.c +++ b/test/unit/pingpong-short.c @@ -61,11 +61,6 @@ static int atoi_scaled(char *s); int output_mod = OUTPUT_MOD; int Verbose; int Slow; -long pSync0[SHMEM_BARRIER_SYNC_SIZE], - pSync1[SHMEM_BARRIER_SYNC_SIZE], - pSync2[SHMEM_BARRIER_SYNC_SIZE], - pSync3[SHMEM_BARRIER_SYNC_SIZE], - pSync4[SHMEM_BARRIER_SYNC_SIZE]; #define DFLT_NWORDS 128 #define DFLT_LOOPS 100 @@ -89,11 +84,6 @@ main(int argc, char* argv[]) DataType *wp; long work_sz; - for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { - pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = - pSync4[j] = SHMEM_SYNC_VALUE; - } - shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); @@ -200,7 +190,7 @@ main(int argc, char* argv[]) if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", shmem_my_pe(),j); #endif - shmem_barrier(0, 0, nProcs, pSync0); + shmem_barrier_all(); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) @@ -220,7 +210,7 @@ main(int argc, char* argv[]) if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", my_pe,j); #endif - shmem_barrier(0, 0, nProcs, pSync1); + shmem_barrier_all(); RDprintf("Workers[1 ... %d] verify Target data put by my_pe 0\n", nWorkers); @@ -241,7 +231,7 @@ main(int argc, char* argv[]) else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); - shmem_barrier(0, 0, nProcs, pSync2); + shmem_barrier_all(); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); @@ -264,7 +254,7 @@ main(int argc, char* argv[]) } } - shmem_barrier(0, 0, nProcs, pSync3); + shmem_barrier_all(); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); @@ -283,7 +273,7 @@ main(int argc, char* argv[]) break; } } - shmem_barrier(0, 0, nProcs, pSync4); + shmem_barrier_all(); if (loops > 1) { RDfprintf(stderr,"."); diff --git a/test/unit/pingpong.c b/test/unit/pingpong.c index f476137..8f6efff 100644 --- a/test/unit/pingpong.c +++ b/test/unit/pingpong.c @@ -61,12 +61,6 @@ int output_mod = OUTPUT_MOD; int Verbose; int Slow; -long pSync0[SHMEM_BARRIER_SYNC_SIZE], - pSync1[SHMEM_BARRIER_SYNC_SIZE], - pSync2[SHMEM_BARRIER_SYNC_SIZE], - pSync3[SHMEM_BARRIER_SYNC_SIZE], - pSync4[SHMEM_BARRIER_SYNC_SIZE]; - #define DFLT_NWORDS 128 #define DFLT_LOOPS 10 @@ -86,11 +80,6 @@ main(int argc, char* argv[]) char *prog_name; long *wp,work_sz; - for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { - pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = - pSync4[j] = SHMEM_SYNC_VALUE; - } - shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); @@ -198,7 +187,7 @@ main(int argc, char* argv[]) if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", my_pe,j); #endif - shmem_barrier(0, 0, nProcs, pSync0); + shmem_barrier_all(); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) @@ -218,7 +207,7 @@ main(int argc, char* argv[]) if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); #endif - shmem_barrier(0, 0, nProcs, pSync1); + shmem_barrier_all(); RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", nWorkers); @@ -239,7 +228,7 @@ main(int argc, char* argv[]) else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); - shmem_barrier(0, 0, nProcs, pSync2); + shmem_barrier_all(); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); @@ -262,7 +251,7 @@ main(int argc, char* argv[]) } } - shmem_barrier(0, 0, nProcs, pSync3); + shmem_barrier_all(); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); @@ -283,7 +272,7 @@ main(int argc, char* argv[]) break; } } - shmem_barrier(0, 0, nProcs, pSync4); + shmem_barrier_all(); #if _DEBUG if (loops > 1) { Rfprintf(stderr,"."); diff --git a/test/unit/reduce_active_set.c b/test/unit/reduce_active_set.c index 1a05788..d5767e8 100644 --- a/test/unit/reduce_active_set.c +++ b/test/unit/reduce_active_set.c @@ -30,16 +30,46 @@ #define NELEM 10 +#ifdef ENABLE_DEPRECATED_TESTS long max_psync[SHMEM_REDUCE_SYNC_SIZE]; long min_psync[SHMEM_REDUCE_SYNC_SIZE]; long min_pwrk[NELEM/2 + SHMEM_REDUCE_MIN_WRKDATA_SIZE]; long max_pwrk[NELEM/2 + SHMEM_REDUCE_MIN_WRKDATA_SIZE]; +#endif long src[NELEM]; long dst_max[NELEM]; long dst_min[NELEM]; +static int validate_max(int i, int me, int npes) { + int errors = 0; + /* Validate reduced max data */ + for (int j = 0; j < NELEM; j++) { + long expected = npes-1; + if (dst_max[j] != expected) { + printf("%d: Max expected dst_max[%d] = %ld, got dst_max[%d] = %ld, iteration %d\n", + me, j, expected, j, dst_max[j], i); + errors++; + } + } + return errors; +} + +static int validate_min(int i, int me, int npes) { + int errors = 0; + /* Validate reduced min data */ + for (int j = 0; j < NELEM; j++) { + long expected = i; + if (dst_min[j] != expected) { + printf("%d: Min expected dst_min[%d] = %ld, got dst_min[%d] = %ld, iteration %d\n", + me, j, expected, j, dst_min[j], i); + errors++; + } + } + return errors; +} + int main(void) { int i, me, npes; @@ -56,10 +86,12 @@ int main(void) dst_min[i] = -1; } +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { max_psync[i] = SHMEM_SYNC_VALUE; min_psync[i] = SHMEM_SYNC_VALUE; } +#endif if (me == 0) printf("Shrinking active set test\n"); @@ -68,37 +100,37 @@ int main(void) /* A total of npes tests are performed, where the active set in each test * includes PEs i..npes-1 */ +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i <= me; i++) { - int j; if (me == i) printf(" + PE_start=%d, logPE_stride=0, PE_size=%d\n", i, npes-i); shmem_long_max_to_all(dst_max, src, NELEM, i, 0, npes-i, max_pwrk, max_psync); - - /* Validate reduced data */ - for (j = 0; j < NELEM; j++) { - long expected = npes-1; - if (dst_max[j] != expected) { - printf("%d: Max expected dst_max[%d] = %ld, got dst_max[%d] = %ld, iteration %d\n", - me, j, expected, j, dst_max[j], i); - errors++; - } - } + errors += validate_max(i, me, npes); shmem_long_min_to_all(dst_min, src, NELEM, i, 0, npes-i, min_pwrk, min_psync); + errors += validate_min(i, me, npes); - /* Validate reduced data */ - for (j = 0; j < NELEM; j++) { - long expected = i; - if (dst_min[j] != expected) { - printf("%d: Min expected dst_min[%d] = %ld, got dst_min[%d] = %ld, iteration %d\n", - me, j, expected, j, dst_min[j], i); - errors++; - } - } + } +#else + shmem_team_t new_team; + for (i = 0; i < npes; i++) { + if (me == i) + printf(" + PE_start=%d, PE_stride=1, PE_size=%d\n", i, npes-i); + + shmem_team_split_strided(SHMEM_TEAM_WORLD, i, 1, npes-i, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) { + shmem_long_max_reduce(new_team, dst_max, src, NELEM); + errors += validate_max(i, me, npes); + + shmem_long_min_reduce(new_team, dst_min, src, NELEM); + errors += validate_min(i, me, npes); + } } +#endif + shmem_finalize(); diff --git a/test/unit/reduce_in_place.c b/test/unit/reduce_in_place.c index cca93bd..66ac5d2 100644 --- a/test/unit/reduce_in_place.c +++ b/test/unit/reduce_in_place.c @@ -30,10 +30,6 @@ #define NELEM 10 -long psync[SHMEM_REDUCE_SYNC_SIZE]; - -long pwrk[NELEM/2 + SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - long src[NELEM]; int main(void) @@ -49,12 +45,9 @@ int main(void) for (int i = 0; i < NELEM; i++) src[i] = me; - for (int i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; - shmem_barrier_all(); - shmem_long_max_to_all(src, src, NELEM, 0, 0, npes, pwrk, psync); + shmem_long_max_reduce(SHMEM_TEAM_WORLD, src, src, NELEM); /* Validate reduced data */ for (int j = 0; j < NELEM; j++) { diff --git a/test/unit/repeated_barriers.c b/test/unit/repeated_barriers.c index b15b265..5584ada 100644 --- a/test/unit/repeated_barriers.c +++ b/test/unit/repeated_barriers.c @@ -30,8 +30,10 @@ #define NREPS 50 +#ifdef ENABLE_DEPRECATED_TESTS long barrier_psync0[SHMEM_BARRIER_SYNC_SIZE]; long barrier_psync1[SHMEM_BARRIER_SYNC_SIZE]; +#endif int main(void) { @@ -42,15 +44,18 @@ int main(void) me = shmem_my_pe(); npes = shmem_n_pes(); +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) { barrier_psync0[i] = SHMEM_SYNC_VALUE; barrier_psync1[i] = SHMEM_SYNC_VALUE; } +#endif shmem_barrier_all(); /* A total of npes tests are performed, where the active set in each test * includes PEs i..npes-1 */ +#ifdef ENABLE_DEPRECATED_TESTS for (i = 0; i <= me; i++) { int j; @@ -61,6 +66,24 @@ int main(void) for (j = 0; j < NREPS; j++) shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); } +#else + shmem_team_t new_team; + for (i = 0; i <= npes; i++) { + int j; + + if (me == i) + printf(" + iteration %d\n", i); + + /* Test that quiet + sync can be called repeatedly*/ + shmem_team_split_strided(SHMEM_TEAM_WORLD, i, 1, npes-i, NULL, 0, &new_team); + if (new_team != SHMEM_TEAM_INVALID) { + for (j = 0; j < NREPS; j++) { + shmem_quiet(); + shmem_team_sync(new_team); + } + } + } +#endif shmem_finalize(); diff --git a/test/unit/self_collectives.c b/test/unit/self_collectives.c index 5fb09f7..2c1347b 100644 --- a/test/unit/self_collectives.c +++ b/test/unit/self_collectives.c @@ -82,98 +82,206 @@ int main(void) { /* Note: Broadcast does not modify the output buffer at the root */ if (me == 0) printf(" + broadcast\n"); +#ifndef ENABLE_DEPRECATED_TESTS + /* Set up active set team (start=me, stride=1, size=1) for all tests*/ + shmem_team_t new_team; + shmem_team_split_strided(SHMEM_TEAM_WORLD, me, 1, 1, NULL, 0, &new_team); +#endif + in_32 = me; out_32 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_broadcast32(&in_32, &out_32, 1, 0, me, 0, 1, bcast_psync); CHECK("shmem_broadcast32", -1, out_32); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_broadcast(new_team, &in_32, &out_32, 1, 0); + CHECK("shmem_int32_broadcast", -1, out_32); +#endif shmem_barrier_all(); in_64 = me; out_64 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_broadcast64(&in_64, &out_64, 1, 0, me, 0, 1, bcast_psync); CHECK("shmem_broadcast64", -1, out_64); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int64_broadcast(new_team, &in_64, &out_64, 1, 0); + CHECK("shmem_int64_broadcast", -1, out_64); +#endif shmem_barrier_all(); /* Collect */ if (me == 0) printf(" + collect\n"); in_32 = me; out_32 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_fcollect32(&in_32, &out_32, 1, me, 0, 1, collect_psync); CHECK("shmem_fcollect32", in_32, out_32); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_fcollect(new_team, &in_32, &out_32, 1); + CHECK("shmem_int32_fcollect", in_32, out_32); +#endif shmem_barrier_all(); in_64 = me; out_64 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_fcollect64(&in_64, &out_64, 1, me, 0, 1, collect_psync); CHECK("shmem_fcollect64", in_64, out_64); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int64_fcollect(new_team, &in_64, &out_64, 1); + CHECK("shmem_int64_fcollect", in_64, out_64); +#endif shmem_barrier_all(); in_32 = me; out_32 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_collect32(&in_32, &out_32, 1, me, 0, 1, collect_psync); CHECK("shmem_collect32", in_32, out_32); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_collect(new_team, &in_32, &out_32, 1); + CHECK("shmem_int32_collect", in_32, out_32); +#endif shmem_barrier_all(); in_64 = me; out_64 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_collect64(&in_64, &out_64, 1, me, 0, 1, collect_psync); CHECK("shmem_collect64", in_64, out_64); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int64_collect(new_team, &in_64, &out_64, 1); + CHECK("shmem_int64_collect", in_64, out_64); +#endif shmem_barrier_all(); /* Reduction */ if (me == 0) printf(" + reduction\n"); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_and_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_and_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_and_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_and_reduce", in, out); +#endif shmem_barrier_all(); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_or_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_or_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_or_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_or_reduce", in, out); +#endif shmem_barrier_all(); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_xor_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_xor_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_xor_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_xor_reduce", in, out); +#endif shmem_barrier_all(); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_min_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_min_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_min_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_min_reduce", in, out); +#endif shmem_barrier_all(); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_max_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_max_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_max_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_max_to_all", in, out); +#endif shmem_barrier_all(); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_sum_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_sum_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_sum_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_sum_reduce", in, out); +#endif shmem_barrier_all(); in = me; out = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_int_prod_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); CHECK("shmem_int_prod_to_all", in, out); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int_prod_reduce(new_team, &in, &out, 1); + CHECK("shmem_int_prod_reduce", in, out); +#endif shmem_barrier_all(); /* All-to-all */ if (me == 0) printf(" + all-to-all\n"); in_32 = me; out_32 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoall32(&in_32, &out_32, 1, me, 0, 1, alltoall_psync); CHECK("shmem_alltoall32", in_32, out_32); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_alltoall(new_team, &in_32, &out_32, 1); + CHECK("shmem_int32_alltoall", in_32, out_32); +#endif shmem_barrier_all(); in_64 = me; out_64 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoall64(&in_64, &out_64, 1, me, 0, 1, alltoall_psync); CHECK("shmem_alltoall64", in_64, out_64); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int64_alltoall(new_team, &in_64, &out_64, 1); + CHECK("shmem_int64_alltoall", in_64, out_64); +#endif shmem_barrier_all(); in_32 = me; out_32 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoalls32(&in_32, &out_32, 1, 1, 1, me, 0, 1, alltoalls_psync); CHECK("shmem_alltoalls32", in_32, out_32); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int32_alltoalls(new_team, &in_32, &out_32, 1, 1, 1); + CHECK("shmem_int32_alltoalls", in_32, out_32); +#endif shmem_barrier_all(); in_64 = me; out_64 = -1; +#ifdef ENABLE_DEPRECATED_TESTS shmem_alltoalls64(&in_64, &out_64, 1, 1, 1, me, 0, 1, alltoalls_psync); CHECK("shmem_alltoalls64", in_64, out_64); +#else + if (new_team != SHMEM_TEAM_INVALID) + shmem_int64_alltoalls(new_team, &in_64, &out_64, 1, 1, 1); + CHECK("shmem_int64_alltoalls", in_64, out_64); +#endif shmem_barrier_all(); if (me == 0) printf("Done\n"); diff --git a/test/unit/shmem_malloc_with_hints.c b/test/unit/shmem_malloc_with_hints.c index e7176b6..5cf142b 100644 --- a/test/unit/shmem_malloc_with_hints.c +++ b/test/unit/shmem_malloc_with_hints.c @@ -37,10 +37,6 @@ #define SHMEM_MALLOC_INVALID_HINT ~(SHMEM_MALLOC_ATOMICS_REMOTE) -long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; -int pWrk[WRK_SIZE]; - - static int sumtoall_with_malloc_hint(long hint, int mype, int npes) { int failed = 0; @@ -59,7 +55,7 @@ static int sumtoall_with_malloc_hint(long hint, int mype, int npes) } shmem_barrier_all(); - shmem_int_sum_to_all(dst, src, N, 0, 0, npes, pWrk, pSync); + shmem_int_sum_reduce(SHMEM_TEAM_WORLD, dst, src, N); if (mype == 0) { for (i = 0; i < N; i++) { @@ -77,7 +73,7 @@ static int sumtoall_with_malloc_hint(long hint, int mype, int npes) int main(int argc, char **argv) { - int npes, i, mype; + int npes, mype; int passed = 0; int fail = 0; @@ -86,9 +82,6 @@ int main(int argc, char **argv) { npes = shmem_n_pes(); mype = shmem_my_pe(); - for (i = 0; i < SHMEM_ALLTOALL_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; - passed = sumtoall_with_malloc_hint(0, mype, npes); passed += sumtoall_with_malloc_hint(SHMEM_MALLOC_ATOMICS_REMOTE, mype, npes); passed += sumtoall_with_malloc_hint(SHMEM_MALLOC_SIGNAL_REMOTE, mype, npes); diff --git a/test/unit/shmem_team_get_config.c b/test/unit/shmem_team_get_config.c new file mode 100644 index 0000000..10c6c6a --- /dev/null +++ b/test/unit/shmem_team_get_config.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +int main(void) +{ + int my_pe, npes, ret, errors = 0; + shmem_team_t even_team; + shmem_team_config_t config; + shmem_team_config_t new_config; + + shmem_init(); + my_pe = shmem_my_pe(); + npes = shmem_n_pes(); + + + ret = shmem_team_get_config(SHMEM_TEAM_WORLD, SHMEM_TEAM_NUM_CONTEXTS, &config); + + if (ret != 0) { + printf("PE %d: unexpected return value (%d)\n", my_pe, ret); + ++errors; + } + + ret = shmem_team_get_config(SHMEM_TEAM_INVALID, 0, &config); + + if (ret == 0) { + printf("PE %d: unexpected (SHMEM_TEAM_INVALID) return value (%d)\n", my_pe, ret); + ++errors; + } + + config.num_contexts = 3; + + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, ((npes-1)/2)+1, &config, 0, &even_team); + + ret = shmem_team_get_config(even_team, SHMEM_TEAM_NUM_CONTEXTS, &new_config); + + if (shmem_team_my_pe(even_team) != -1 && + (new_config.num_contexts != 3 || ret != 0)) { + printf("PE %d: unexpected num_contexts (%d)\n", my_pe, new_config.num_contexts); + ++errors; + } + + if (shmem_team_my_pe(even_team) == -1 && ret == 0) { + printf("PE %d: unexpected return value from non-team members (%d)\n", my_pe, ret); + ++errors; + } + + shmem_finalize(); + return errors != 0; +} diff --git a/test/unit/shmem_team_reduce.c b/test/unit/shmem_team_reduce.c new file mode 100644 index 0000000..69cbc69 --- /dev/null +++ b/test/unit/shmem_team_reduce.c @@ -0,0 +1,319 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2021 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_NPES 32 + +enum op { and = 0, or, xor, max, min, sum, prod }; + +const double FLOATING_POINT_TOLERANCE = 1e-6; + +#define REDUCTION(OP, TYPE) \ + do { ret = shmem_##TYPE##_##OP##_reduce(SHMEM_TEAM_WORLD, dest, src, npes); } while (0) + +#define is_floating_point(X) _Generic((X), \ + float: true, \ + double: true, \ + long double: true, \ + float _Complex: true, \ + double _Complex: true, \ + default: false \ +) + +#define INIT_SRC_BUFFER(TYPE) \ + do { \ + for (int i = 0; i < MAX_NPES; i++) { \ + src[i] = (TYPE)1ULL; \ + } \ + } while (0) + +#define CHECK_DEST_BUFFER(OP, TYPE, CORRECT_VAL) \ + do { \ + for (int i = 0; i < npes; i++) { \ + if (dest[i] != (TYPE)CORRECT_VAL) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_REDUCE(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } \ + } while (0) + +#define CHECK_DEST_BUFFER_FP(OP, TYPE, CORRECT_VAL, TOLERANCE) \ + do { \ + for (int i = 0; i < npes; i++) { \ + if (fabsl(creal(dest[i]) - creal((TYPE)CORRECT_VAL)) > TOLERANCE) { \ + printf("PE %i received incorrect real value with " \ + "TEST_SHMEM_REDUCE(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + if (fabsl(cimag(dest[i]) - cimag((TYPE)CORRECT_VAL)) > TOLERANCE) { \ + printf("PE %i received incorrect imaginary value with " \ + "TEST_SHMEM_REDUCE(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } \ + } while (0) + +#define TEST_SHMEM_REDUCE(OP, TYPENAME, TYPE) \ + do { \ + static TYPE src[MAX_NPES]; \ + static TYPE dest[MAX_NPES]; \ + int ret; \ + const bool floating_point_val = is_floating_point((TYPE)0); \ + \ + INIT_SRC_BUFFER(TYPE); \ + \ + REDUCTION(OP, TYPENAME); \ + \ + if (ret != 0) { \ + printf("Reduction returned non-zero value (%i) on PE (%i) with " \ + "TEST_SHMEM_REDUCE(%s, %s)\n", ret, mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + \ + shmem_barrier_all(); \ + \ + switch (OP) { \ + case and: \ + CHECK_DEST_BUFFER(OP, TYPE, 1ULL); \ + break; \ + case or: \ + CHECK_DEST_BUFFER(OP, TYPE, 1ULL); \ + break; \ + case xor: \ + CHECK_DEST_BUFFER(OP, TYPE, (TYPE)(npes % 2 ? 1ULL : 0ULL)); \ + break; \ + case max: \ + if (floating_point_val) \ + CHECK_DEST_BUFFER_FP(OP, TYPE, 1ULL, FLOATING_POINT_TOLERANCE); \ + else \ + CHECK_DEST_BUFFER(OP, TYPE, 1ULL); \ + break; \ + case min: \ + if (floating_point_val) \ + CHECK_DEST_BUFFER_FP(OP, TYPE, 1ULL, FLOATING_POINT_TOLERANCE); \ + else \ + CHECK_DEST_BUFFER(OP, TYPE, 1ULL); \ + break; \ + case sum: \ + if (floating_point_val) \ + CHECK_DEST_BUFFER_FP(OP, TYPE, npes, FLOATING_POINT_TOLERANCE); \ + else \ + CHECK_DEST_BUFFER(OP, TYPE, npes); \ + break; \ + case prod: \ + if (floating_point_val) \ + CHECK_DEST_BUFFER_FP(OP, TYPE, 1ULL, FLOATING_POINT_TOLERANCE); \ + else \ + CHECK_DEST_BUFFER(OP, TYPE, 1ULL); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + } while (0) + + +int main(void) { + + shmem_init(); + + int rc = EXIT_SUCCESS; + + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); + + if (npes > MAX_NPES) { + if (mype == 0) + fprintf(stderr, "ERR - Requires less than %d PEs\n", MAX_NPES); + shmem_global_exit(1); + } + + TEST_SHMEM_REDUCE(and, uchar, unsigned char); + TEST_SHMEM_REDUCE(and, ushort, unsigned short); + TEST_SHMEM_REDUCE(and, uint, unsigned int); + TEST_SHMEM_REDUCE(and, ulong, unsigned long); + TEST_SHMEM_REDUCE(and, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(and, int8, int8_t); + TEST_SHMEM_REDUCE(and, int16, int16_t); + TEST_SHMEM_REDUCE(and, int32, int32_t); + TEST_SHMEM_REDUCE(and, int64, int64_t); + TEST_SHMEM_REDUCE(and, uint8, uint8_t); + TEST_SHMEM_REDUCE(and, uint16, uint16_t); + TEST_SHMEM_REDUCE(and, uint32, uint32_t); + TEST_SHMEM_REDUCE(and, uint64, uint64_t); + TEST_SHMEM_REDUCE(and, size, size_t); + + TEST_SHMEM_REDUCE(or, uchar, unsigned char); + TEST_SHMEM_REDUCE(or, ushort, unsigned short); + TEST_SHMEM_REDUCE(or, uint, unsigned int); + TEST_SHMEM_REDUCE(or, ulong, unsigned long); + TEST_SHMEM_REDUCE(or, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(or, int8, int8_t); + TEST_SHMEM_REDUCE(or, int16, int16_t); + TEST_SHMEM_REDUCE(or, int32, int32_t); + TEST_SHMEM_REDUCE(or, int64, int64_t); + TEST_SHMEM_REDUCE(or, uint8, uint8_t); + TEST_SHMEM_REDUCE(or, uint16, uint16_t); + TEST_SHMEM_REDUCE(or, uint32, uint32_t); + TEST_SHMEM_REDUCE(or, uint64, uint64_t); + TEST_SHMEM_REDUCE(or, size, size_t); + + TEST_SHMEM_REDUCE(xor, uchar, unsigned char); + TEST_SHMEM_REDUCE(xor, ushort, unsigned short); + TEST_SHMEM_REDUCE(xor, uint, unsigned int); + TEST_SHMEM_REDUCE(xor, ulong, unsigned long); + TEST_SHMEM_REDUCE(xor, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(xor, int8, int8_t); + TEST_SHMEM_REDUCE(xor, int16, int16_t); + TEST_SHMEM_REDUCE(xor, int32, int32_t); + TEST_SHMEM_REDUCE(xor, int64, int64_t); + TEST_SHMEM_REDUCE(xor, uint8, uint8_t); + TEST_SHMEM_REDUCE(xor, uint16, uint16_t); + TEST_SHMEM_REDUCE(xor, uint32, uint32_t); + TEST_SHMEM_REDUCE(xor, uint64, uint64_t); + TEST_SHMEM_REDUCE(xor, size, size_t); + + TEST_SHMEM_REDUCE(max, char, char); + TEST_SHMEM_REDUCE(max, schar, signed char); + TEST_SHMEM_REDUCE(max, short, short); + TEST_SHMEM_REDUCE(max, int, int); + TEST_SHMEM_REDUCE(max, long, long); + TEST_SHMEM_REDUCE(max, longlong, long long); + TEST_SHMEM_REDUCE(max, ptrdiff, ptrdiff_t); + TEST_SHMEM_REDUCE(max, uchar, unsigned char); + TEST_SHMEM_REDUCE(max, ushort, unsigned short); + TEST_SHMEM_REDUCE(max, uint, unsigned int); + TEST_SHMEM_REDUCE(max, ulong, unsigned long); + TEST_SHMEM_REDUCE(max, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(max, int8, int8_t); + TEST_SHMEM_REDUCE(max, int16, int16_t); + TEST_SHMEM_REDUCE(max, int32, int32_t); + TEST_SHMEM_REDUCE(max, int64, int64_t); + TEST_SHMEM_REDUCE(max, uint8, uint8_t); + TEST_SHMEM_REDUCE(max, uint16, uint16_t); + TEST_SHMEM_REDUCE(max, uint32, uint32_t); + TEST_SHMEM_REDUCE(max, uint64, uint64_t); + TEST_SHMEM_REDUCE(max, size, size_t); + TEST_SHMEM_REDUCE(max, float, float); + TEST_SHMEM_REDUCE(max, double, double); + TEST_SHMEM_REDUCE(max, longdouble, long double); + + TEST_SHMEM_REDUCE(min, char, char); + TEST_SHMEM_REDUCE(min, schar, signed char); + TEST_SHMEM_REDUCE(min, short, short); + TEST_SHMEM_REDUCE(min, int, int); + TEST_SHMEM_REDUCE(min, long, long); + TEST_SHMEM_REDUCE(min, longlong, long long); + TEST_SHMEM_REDUCE(min, ptrdiff, ptrdiff_t); + TEST_SHMEM_REDUCE(min, uchar, unsigned char); + TEST_SHMEM_REDUCE(min, ushort, unsigned short); + TEST_SHMEM_REDUCE(min, uint, unsigned int); + TEST_SHMEM_REDUCE(min, ulong, unsigned long); + TEST_SHMEM_REDUCE(min, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(min, int8, int8_t); + TEST_SHMEM_REDUCE(min, int16, int16_t); + TEST_SHMEM_REDUCE(min, int32, int32_t); + TEST_SHMEM_REDUCE(min, int64, int64_t); + TEST_SHMEM_REDUCE(min, uint8, uint8_t); + TEST_SHMEM_REDUCE(min, uint16, uint16_t); + TEST_SHMEM_REDUCE(min, uint32, uint32_t); + TEST_SHMEM_REDUCE(min, uint64, uint64_t); + TEST_SHMEM_REDUCE(min, size, size_t); + TEST_SHMEM_REDUCE(min, float, float); + TEST_SHMEM_REDUCE(min, double, double); + TEST_SHMEM_REDUCE(min, longdouble, long double); + + TEST_SHMEM_REDUCE(sum, char, char); + TEST_SHMEM_REDUCE(sum, schar, signed char); + TEST_SHMEM_REDUCE(sum, short, short); + TEST_SHMEM_REDUCE(sum, int, int); + TEST_SHMEM_REDUCE(sum, long, long); + TEST_SHMEM_REDUCE(sum, longlong, long long); + TEST_SHMEM_REDUCE(sum, ptrdiff, ptrdiff_t); + TEST_SHMEM_REDUCE(sum, uchar, unsigned char); + TEST_SHMEM_REDUCE(sum, ushort, unsigned short); + TEST_SHMEM_REDUCE(sum, uint, unsigned int); + TEST_SHMEM_REDUCE(sum, ulong, unsigned long); + TEST_SHMEM_REDUCE(sum, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(sum, int8, int8_t); + TEST_SHMEM_REDUCE(sum, int16, int16_t); + TEST_SHMEM_REDUCE(sum, int32, int32_t); + TEST_SHMEM_REDUCE(sum, int64, int64_t); + TEST_SHMEM_REDUCE(sum, uint8, uint8_t); + TEST_SHMEM_REDUCE(sum, uint16, uint16_t); + TEST_SHMEM_REDUCE(sum, uint32, uint32_t); + TEST_SHMEM_REDUCE(sum, uint64, uint64_t); + TEST_SHMEM_REDUCE(sum, size, size_t); + TEST_SHMEM_REDUCE(sum, float, float); + TEST_SHMEM_REDUCE(sum, double, double); + TEST_SHMEM_REDUCE(sum, longdouble, long double); + TEST_SHMEM_REDUCE(sum, complexd, double _Complex); + TEST_SHMEM_REDUCE(sum, complexf, float _Complex); + + TEST_SHMEM_REDUCE(prod, char, char); + TEST_SHMEM_REDUCE(prod, schar, signed char); + TEST_SHMEM_REDUCE(prod, short, short); + TEST_SHMEM_REDUCE(prod, int, int); + TEST_SHMEM_REDUCE(prod, long, long); + TEST_SHMEM_REDUCE(prod, longlong, long long); + TEST_SHMEM_REDUCE(prod, ptrdiff, ptrdiff_t); + TEST_SHMEM_REDUCE(prod, uchar, unsigned char); + TEST_SHMEM_REDUCE(prod, ushort, unsigned short); + TEST_SHMEM_REDUCE(prod, uint, unsigned int); + TEST_SHMEM_REDUCE(prod, ulong, unsigned long); + TEST_SHMEM_REDUCE(prod, ulonglong, unsigned long long); + TEST_SHMEM_REDUCE(prod, int8, int8_t); + TEST_SHMEM_REDUCE(prod, int16, int16_t); + TEST_SHMEM_REDUCE(prod, int32, int32_t); + TEST_SHMEM_REDUCE(prod, int64, int64_t); + TEST_SHMEM_REDUCE(prod, uint8, uint8_t); + TEST_SHMEM_REDUCE(prod, uint16, uint16_t); + TEST_SHMEM_REDUCE(prod, uint32, uint32_t); + TEST_SHMEM_REDUCE(prod, uint64, uint64_t); + TEST_SHMEM_REDUCE(prod, size, size_t); + TEST_SHMEM_REDUCE(prod, float, float); + TEST_SHMEM_REDUCE(prod, double, double); + TEST_SHMEM_REDUCE(prod, longdouble, long double); + TEST_SHMEM_REDUCE(prod, complexd, double _Complex); + TEST_SHMEM_REDUCE(prod, complexf, float _Complex); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/signal_wait_until.c b/test/unit/signal_wait_until.c new file mode 100644 index 0000000..8fee385 --- /dev/null +++ b/test/unit/signal_wait_until.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2021 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Validate signal_wait_until operation using blocking put_signal +*/ + +#include +#include +#include + +#define MSG_SZ 10 + +int main(int argc, char *argv[]) +{ + long source[MSG_SZ]; + long *target; + int me, npes, i; + int errors = 0; + + static uint64_t sig_addr = 0; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < MSG_SZ; i++) + source[i] = i; + + target = (long *) shmem_calloc(MSG_SZ, sizeof(long)); + if (!target) { + fprintf(stderr, "Failed to allocate target pointer\n"); + shmem_global_exit(1); + } + + shmem_barrier_all(); + + for (i = 0; i < npes; i++) { + shmem_long_put_signal(target, source, MSG_SZ, &sig_addr, 1, SHMEM_SIGNAL_ADD, i); + } + + uint64_t retval = shmem_signal_wait_until(&sig_addr, SHMEM_CMP_LE, npes); + + if (retval > (uint64_t) npes) + errors++; + + retval = shmem_signal_wait_until(&sig_addr, SHMEM_CMP_EQ, npes); + if (retval != (uint64_t) npes) + errors++; + + retval = shmem_signal_wait_until(&sig_addr, SHMEM_CMP_LT, npes + 1); + if (retval != (uint64_t) npes) + errors++; + + for (i = 0; i < MSG_SZ; i++) { + if (target[i] != source[i]) { + fprintf(stderr, "%10d: target[%d] = %ld not matching %ld with SHMEM_SIGNAL_ADD\n", + me, i, target[i], source[i]); + errors++; + } + } + + for (i = 0; i < MSG_SZ; i++) + target[i] = 0; + + shmem_barrier_all(); + + for (i = 0; i < npes; i++) { + shmem_long_put_signal(target, source, MSG_SZ, &sig_addr, npes + 1, SHMEM_SIGNAL_SET, i); + } + + retval = shmem_signal_wait_until(&sig_addr, SHMEM_CMP_GE, npes); + if (retval < (uint64_t) npes) + errors++; + + retval = shmem_signal_wait_until(&sig_addr, SHMEM_CMP_EQ, npes + 1); + if (retval != (uint64_t) npes + 1) + errors++; + + shmem_barrier_all(); + + for (i = 0; i < MSG_SZ; i++) { + if (target[i] != source[i]) { + fprintf(stderr, "%10d: target[%d] = %ld not matching %ld with SHMEM_SIGNAL_SET\n", + me, i, target[i], source[i]); + errors++; + } + } + + shmem_free(target); + shmem_finalize(); + + return errors; +} diff --git a/test/unit/sync-size.c b/test/unit/sync-size.c index 8af6ea3..090cdf7 100644 --- a/test/unit/sync-size.c +++ b/test/unit/sync-size.c @@ -25,8 +25,6 @@ * SOFTWARE. */ -/* Test various collectives using the same pSync array of SHMEM_SYNC_SIZE */ - #include #include #include @@ -35,35 +33,18 @@ #define N 3 #define MAX(A,B) ((A) > (B)) ? (A) : (B) -long pSync[SHMEM_SYNC_SIZE]; -long pWrk[MAX(N/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE)]; - long src[N]; long dst[N]; int main(int argc, char* argv[]) { - int i, j, me, npes, long_is_32; + int i, j, me, npes; int errors = 0; - for (i = 0; i < SHMEM_SYNC_SIZE; i++) { - pSync[i] = SHMEM_SYNC_VALUE; - pSync[i] = SHMEM_SYNC_VALUE; - } - shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); - if (sizeof(long) == 4) { - long_is_32 = 1; - } else if (sizeof(long) == 8) { - long_is_32 = 0; - } else { - printf("Error: sizeof(long) == %zu, must be either 4 or 8\n", sizeof(long)); - shmem_global_exit(1); - } - for (i = 0; i < N; i += 1) { src[i] = me; dst[i] = -1; @@ -71,15 +52,11 @@ int main(int argc, char* argv[]) { /* Barrier */ - shmem_barrier(0, 0, npes, pSync); shmem_barrier_all(); /* Broadcast */ - if (long_is_32) - shmem_broadcast32(dst, src, N, 0, 0, 0, npes, pSync); - else - shmem_broadcast64(dst, src, N, 0, 0, 0, npes, pSync); + shmem_long_broadcast(SHMEM_TEAM_WORLD, dst, src, N, 0); for (i = 0; i < N && me > 0; i++) { if (dst[i] != 0) { @@ -94,10 +71,7 @@ int main(int argc, char* argv[]) { long *dst_all = shmem_malloc(npes * N * sizeof(long)); - if (long_is_32) - shmem_fcollect32(dst_all, src, N, 0, 0, npes, pSync); - else - shmem_fcollect64(dst_all, src, N, 0, 0, npes, pSync); + shmem_long_fcollect(SHMEM_TEAM_WORLD, dst_all, src, N); for (i = 0; i < npes; i++) { for (j = 0; j < N; j++) { @@ -113,7 +87,7 @@ int main(int argc, char* argv[]) { /* Reduction */ - shmem_long_max_to_all(dst, src, N, 0, 0, npes, pWrk, pSync); + shmem_long_max_reduce(SHMEM_TEAM_WORLD, dst, src, N); for (i = 0; i < N; i++) { if (dst[i] != npes-1) { diff --git a/test/unit/to_all.c b/test/unit/to_all.c index 6bb49ae..74e1414 100644 --- a/test/unit/to_all.c +++ b/test/unit/to_all.c @@ -35,8 +35,6 @@ * short, int, long, float, double, long double, long long. * Point being numerous SHMEM atomics and synchronizations in flight. * From OpenSHMEM_specification_v1.0-final doc: -* The pWrk and pSync arrays on all PEs in the active set must not be -* in use from a prior call to a collective OpenSHMEM routine. * * frank @ SystemFabric Works identified an interesting overflow issue in the * prod_to_all test. In the presence of slightly larger PE counts (>=14), @@ -44,8 +42,7 @@ * The short and int both wrap correctly and are both uniformly wrong...uniformly * being the salient point. float, double and long double all suffer from * floating point rounding errors, hence the FP test results are ignored -* (assumed to pass)when FP rounding is encountered. FP*_prod_to_all() calls are -* still made so as not to upset the pSync ordering. +* (assumed to pass)when FP rounding is encountered. * * usage: to_all {-amopsSv|h} * where: @@ -71,34 +68,31 @@ #define Rfprintf if (shmem_my_pe() == 0) fprintf #define Vprintf if (Verbose > 1) printf -int sum_to_all(int me, int npes); -int and_to_all(int me, int npes); -int min_to_all(int me, int npes); -int max_to_all(int me, int npes); -int prod_to_all(int me, int npes); -int or_to_all(int me, int npes); -int xor_to_all(int me, int npes); +int sum_reduce(int me, int npes); +int and_reduce(int me, int npes); +int min_reduce(int me, int npes); +int max_reduce(int me, int npes); +int prod_reduce(int me, int npes); +int or_reduce(int me, int npes); +int xor_reduce(int me, int npes); int Verbose; int Serialize; int Min, And, Sum, Prod, Or, Xor; int Passed; -long pSync[SHMEM_REDUCE_SYNC_SIZE]; -long pSync1[SHMEM_REDUCE_SYNC_SIZE]; - #define N 128 #define MAX(a, b) ((a) > (b)) ? (a) : (b) #define WRK_SIZE MAX(N/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) -short src0[N], dst0[N], pWrk0[WRK_SIZE]; -int src1[N], dst1[N], pWrk1[WRK_SIZE]; -long src2[N], dst2[N], pWrk2[WRK_SIZE]; -float src3[N], dst3[N], pWrk3[WRK_SIZE]; -double src4[N], dst4[N], pWrk4[WRK_SIZE]; -long double src5[N], dst5[N], pWrk5[WRK_SIZE]; -long long src6[N], dst6[N], pWrk6[WRK_SIZE]; +short src0[N], dst0[N]; +int src1[N], dst1[N]; +long src2[N], dst2[N]; +float src3[N], dst3[N]; +double src4[N], dst4[N]; +long double src5[N], dst5[N]; +long long src6[N], dst6[N]; short expected_result0; int expected_result1; @@ -111,7 +105,7 @@ long long expected_result6; int ok[7]; int -max_to_all(int me, int npes) +max_reduce(int me, int npes) { int i, j, pass=0; @@ -123,13 +117,13 @@ max_to_all(int me, int npes) } shmem_barrier_all(); - shmem_short_max_to_all( dst0, src0, N, 0, 0, npes, pWrk0, pSync); - shmem_int_max_to_all( dst1, src1, N, 0, 0, npes, pWrk1, pSync1); - shmem_long_max_to_all( dst2, src2, N, 0, 0, npes, pWrk2, pSync); - shmem_float_max_to_all( dst3, src3, N, 0, 0, npes, pWrk3, pSync1); - shmem_double_max_to_all( dst4, src4, N, 0, 0, npes, pWrk4, pSync); - shmem_longdouble_max_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); - shmem_longlong_max_to_all( dst6, src6, N, 0, 0, npes, pWrk6, pSync); + shmem_short_max_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_max_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_max_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_float_max_reduce( SHMEM_TEAM_WORLD, dst3, src3, N); + shmem_double_max_reduce( SHMEM_TEAM_WORLD, dst4, src4, N); + shmem_longdouble_max_reduce(SHMEM_TEAM_WORLD, dst5, src5, N); + shmem_longlong_max_reduce( SHMEM_TEAM_WORLD, dst6, src6, N); if (me == 0) { for (i = 0,j=-1; i < N; i++,j++) { @@ -143,52 +137,52 @@ max_to_all(int me, int npes) } if(ok[0]==1){ - printf("Reduction operation shmem_short_max_to_all: Failed\n"); + printf("Reduction operation shmem_short_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_short_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_max_reduce: Passed\n"); pass++; } if(ok[1]==1){ - printf("Reduction operation shmem_int_max_to_all: Failed\n"); + printf("Reduction operation shmem_int_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_int_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_max_reduce: Passed\n"); pass++; } if(ok[2]==1){ - printf("Reduction operation shmem_long_max_to_all: Failed\n"); + printf("Reduction operation shmem_long_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_long_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_max_reduce: Passed\n"); pass++; } if(ok[3]==1){ - printf("Reduction operation shmem_float_max_to_all: Failed\n"); + printf("Reduction operation shmem_float_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_float_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_float_max_reduce: Passed\n"); pass++; } if(ok[4]==1){ - printf("Reduction operation shmem_double_max_to_all: Failed\n"); + printf("Reduction operation shmem_double_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_double_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_double_max_reduce: Passed\n"); pass++; } if(ok[5]==1){ - printf("Reduction operation shmem_longdouble_max_to_all: Failed\n"); + printf("Reduction operation shmem_longdouble_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longdouble_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longdouble_max_reduce: Passed\n"); pass++; } if(ok[6]==1){ - printf("Reduction operation shmem_longlong_max_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_max_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longlong_max_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_max_reduce: Passed\n"); pass++; } Vprintf("\n"); @@ -199,7 +193,7 @@ max_to_all(int me, int npes) } int -min_to_all(int me, int npes) +min_reduce(int me, int npes) { int i, pass=0; @@ -219,13 +213,13 @@ min_to_all(int me, int npes) shmem_barrier_all(); - shmem_short_min_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); - shmem_int_min_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); - shmem_long_min_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); - shmem_float_min_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); - shmem_double_min_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); - shmem_longdouble_min_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); - shmem_longlong_min_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); + shmem_short_min_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_min_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_min_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_float_min_reduce( SHMEM_TEAM_WORLD, dst3, src3, N); + shmem_double_min_reduce( SHMEM_TEAM_WORLD, dst4, src4, N); + shmem_longdouble_min_reduce(SHMEM_TEAM_WORLD, dst5, src5, N); + shmem_longlong_min_reduce( SHMEM_TEAM_WORLD, dst6, src6, N); if(me == 0) { for (i = 0; i < N; i++) { @@ -238,52 +232,52 @@ min_to_all(int me, int npes) if(dst6[i] != i) ok[6] = 1; } if(ok[0]==1){ - printf("Reduction operation shmem_short_min_to_all: Failed\n"); + printf("Reduction operation shmem_short_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_short_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_min_reduce: Passed\n"); pass++; } if(ok[1]==1){ - printf("Reduction operation shmem_int_min_to_all: Failed\n"); + printf("Reduction operation shmem_int_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_int_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_min_reduce: Passed\n"); pass++; } if(ok[2]==1){ - printf("Reduction operation shmem_long_min_to_all: Failed\n"); + printf("Reduction operation shmem_long_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_long_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_min_reduce: Passed\n"); pass++; } if(ok[3]==1){ - printf("Reduction operation shmem_float_min_to_all: Failed\n"); + printf("Reduction operation shmem_float_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_float_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_float_min_reduce: Passed\n"); pass++; } if(ok[4]==1){ - printf("Reduction operation shmem_double_min_to_all: Failed\n"); + printf("Reduction operation shmem_double_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_double_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_double_min_reduce: Passed\n"); pass++; } if(ok[5]==1){ - printf("Reduction operation shmem_longdouble_min_to_all: Failed\n"); + printf("Reduction operation shmem_longdouble_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longdouble_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longdouble_min_reduce: Passed\n"); pass++; } if(ok[6]==1){ - printf("Reduction operation shmem_longlong_min_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_min_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longlong_min_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_min_reduce: Passed\n"); pass++; } Vprintf("\n"); @@ -295,7 +289,7 @@ min_to_all(int me, int npes) int -sum_to_all(int me, int npes) +sum_reduce(int me, int npes) { int i, pass=0; @@ -314,13 +308,13 @@ sum_to_all(int me, int npes) shmem_barrier_all(); - shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); - shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); - shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); - shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); - shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); - shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); - shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); + shmem_short_sum_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_sum_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_sum_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_float_sum_reduce( SHMEM_TEAM_WORLD, dst3, src3, N); + shmem_double_sum_reduce( SHMEM_TEAM_WORLD, dst4, src4, N); + shmem_longdouble_sum_reduce(SHMEM_TEAM_WORLD, dst5, src5, N); + shmem_longlong_sum_reduce( SHMEM_TEAM_WORLD, dst6, src6, N); if(me == 0) { for (i = 0; i < N; i++) { @@ -333,52 +327,52 @@ sum_to_all(int me, int npes) if(dst6[i] != (long long) (npes * (npes-1)/2)) ok[6] = 1; } if(ok[0]==1){ - printf("Reduction operation shmem_short_sum_to_all: Failed\n"); + printf("Reduction operation shmem_short_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_short_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_sum_reduce: Passed\n"); pass++; } if(ok[1]==1){ - printf("Reduction operation shmem_int_sum_to_all: Failed\n"); + printf("Reduction operation shmem_int_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_int_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_sum_reduce: Passed\n"); pass++; } if(ok[2]==1){ - printf("Reduction operation shmem_long_sum_to_all: Failed\n"); + printf("Reduction operation shmem_long_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_long_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_sum_reduce: Passed\n"); pass++; } if(ok[3]==1){ - printf("Reduction operation shmem_float_sum_to_all: Failed\n"); + printf("Reduction operation shmem_float_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_float_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_float_sum_reduce: Passed\n"); pass++; } if(ok[4]==1){ - printf("Reduction operation shmem_double_sum_to_all: Failed\n"); + printf("Reduction operation shmem_double_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_double_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_double_sum_reduce: Passed\n"); pass++; } if(ok[5]==1){ - printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n"); + printf("Reduction operation shmem_longdouble_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longdouble_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longdouble_sum_reduce: Passed\n"); pass++; } if(ok[6]==1){ - printf("Reduction operation shmem_longlong_sum_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_sum_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longlong_sum_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_sum_reduce: Passed\n"); pass++; } Vprintf("\n"); fflush(stdout); @@ -390,7 +384,7 @@ sum_to_all(int me, int npes) int -and_to_all(int me, int num_pes) +and_reduce(int me, int num_pes) { int i, pass=0; @@ -403,10 +397,10 @@ and_to_all(int me, int num_pes) shmem_barrier_all(); - shmem_short_and_to_all(dst0, src0, N, 0, 0, num_pes, pWrk0, pSync); - shmem_int_and_to_all(dst1, src1, N, 0, 0, num_pes, pWrk1, pSync1); - shmem_long_and_to_all(dst2, src2, N, 0, 0, num_pes, pWrk2, pSync); - shmem_longlong_and_to_all(dst6, src6, N, 0, 0, num_pes, pWrk6, pSync1); + shmem_short_and_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_and_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_and_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_longlong_and_reduce(SHMEM_TEAM_WORLD, dst6, src6, N); if (me==0) { for (i = 0; i < N; i++) { @@ -417,31 +411,31 @@ and_to_all(int me, int num_pes) } if(ok[0]==1){ - printf("Reduction operation shmem_short_and_to_all: Failed\n"); + printf("Reduction operation shmem_short_and_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_short_and_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_and_reduce: Passed\n"); pass++; } if(ok[1]==1){ - printf("Reduction operation shmem_int_and_to_all: Failed\n"); + printf("Reduction operation shmem_int_and_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_int_and_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_and_reduce: Passed\n"); pass++; } if(ok[2]==1){ - printf("Reduction operation shmem_long_and_to_all: Failed\n"); + printf("Reduction operation shmem_long_and_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_long_and_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_and_reduce: Passed\n"); pass++; } if(ok[3]==1){ - printf("Reduction operation shmem_longlong_and_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_and_reduce: Failed\n"); } else{ - Vprintf("Reduction operation shmem_longlong_and_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_and_reduce: Passed\n"); pass++; } Vprintf("\n"); fflush(stdout); @@ -453,7 +447,7 @@ and_to_all(int me, int num_pes) int -prod_to_all(int me, int npes) +prod_reduce(int me, int npes) { int i, pass=0; int float_rounding_err=0; @@ -501,13 +495,13 @@ prod_to_all(int me, int npes) shmem_barrier_all(); - shmem_short_prod_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); - shmem_int_prod_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); - shmem_long_prod_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); - shmem_float_prod_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); - shmem_double_prod_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); - shmem_longdouble_prod_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); - shmem_longlong_prod_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); + shmem_short_prod_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_prod_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_prod_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_float_prod_reduce( SHMEM_TEAM_WORLD, dst3, src3, N); + shmem_double_prod_reduce( SHMEM_TEAM_WORLD, dst4, src4, N); + shmem_longdouble_prod_reduce(SHMEM_TEAM_WORLD, dst5, src5, N); + shmem_longlong_prod_reduce( SHMEM_TEAM_WORLD, dst6, src6, N); if(me == 0) { for (i = 0; i < N; i++) { @@ -529,66 +523,66 @@ prod_to_all(int me, int npes) } if(ok[0]==1) - printf("Reduction operation shmem_short_prod_to_all: Failed\n"); + printf("Reduction operation shmem_short_prod_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_short_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_prod_reduce: Passed\n"); pass++; } if(ok[1]==1) - printf("Reduction operation shmem_int_prod_to_all: Failed\n"); + printf("Reduction operation shmem_int_prod_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_int_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_prod_reduce: Passed\n"); pass++; } if(ok[2]==1) - printf("Reduction operation shmem_long_prod_to_all: Failed\n"); + printf("Reduction operation shmem_long_prod_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_long_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_prod_reduce: Passed\n"); pass++; } if(ok[3]==1) - printf("Reduction operation shmem_float_prod_to_all: Failed\n"); + printf("Reduction operation shmem_float_prod_reduce: Failed\n"); else { if (float_rounding_err) { - Vprintf("Reduction operation shmem_float_prod_to_all: skipped due to float rounding error\n"); + Vprintf("Reduction operation shmem_float_prod_reduce: skipped due to float rounding error\n"); } else { - Vprintf("Reduction operation shmem_float_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_float_prod_reduce: Passed\n"); } pass++; } if(ok[4]==1) - printf("Reduction operation shmem_double_prod_to_all: Failed\n"); + printf("Reduction operation shmem_double_prod_reduce: Failed\n"); else { if (double_rounding_err) { - Vprintf("Reduction operation shmem_double_prod_to_all: skipped due to double rounding error\n"); + Vprintf("Reduction operation shmem_double_prod_reduce: skipped due to double rounding error\n"); } else { - Vprintf("Reduction operation shmem_double_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_double_prod_reduce: Passed\n"); } pass++; } if(ok[5]==1) - printf("Reduction operation shmem_longdouble_prod_to_all: Failed\n"); + printf("Reduction operation shmem_longdouble_prod_reduce: Failed\n"); else { if (double_rounding_err) { - Vprintf("Reduction operation shmem_longdouble_prod_to_all: skipped due to long double rounding error\n"); + Vprintf("Reduction operation shmem_longdouble_prod_reduce: skipped due to long double rounding error\n"); } else { - Vprintf("Reduction operation shmem_longdouble_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longdouble_prod_reduce: Passed\n"); } pass++; } if(ok[6]==1) - printf("Reduction operation shmem_longlong_prod_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_prod_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_longlong_prod_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_prod_reduce: Passed\n"); pass++; } Vprintf("\n"); @@ -600,7 +594,7 @@ prod_to_all(int me, int npes) int -or_to_all(int me, int npes) +or_reduce(int me, int npes) { int i, pass=0; @@ -616,10 +610,10 @@ or_to_all(int me, int npes) shmem_barrier_all(); - shmem_short_or_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); - shmem_int_or_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); - shmem_long_or_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); - shmem_longlong_or_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); + shmem_short_or_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_or_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_or_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_longlong_or_reduce(SHMEM_TEAM_WORLD, dst6, src6, N); if (me==0) { for (i = 0; i < N; i++) { @@ -632,30 +626,30 @@ or_to_all(int me, int npes) } if(ok[0]==1) - printf("Reduction operation shmem_short_or_to_all: Failed\n"); + printf("Reduction operation shmem_short_or_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_short_or_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_or_reduce: Passed\n"); pass++; } if(ok[1]==1) - printf("Reduction operation shmem_int_or_to_all: Failed\n"); + printf("Reduction operation shmem_int_or_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_int_or_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_or_reduce: Passed\n"); pass++; } if(ok[2]==1) - printf("Reduction operation shmem_long_or_to_all: Failed\n"); + printf("Reduction operation shmem_long_or_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_long_or_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_or_reduce: Passed\n"); pass++; } if(ok[6]==1) - printf("Reduction operation shmem_longlong_or_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_or_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_longlong_or_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_or_reduce: Passed\n"); pass++; } Vprintf("\n"); @@ -667,7 +661,7 @@ or_to_all(int me, int npes) int -xor_to_all(int me, int npes) +xor_reduce(int me, int npes) { int i, pass=0; int expected_result = ((int)(npes/2) % 2); @@ -684,10 +678,10 @@ xor_to_all(int me, int npes) shmem_barrier_all(); - shmem_short_xor_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); - shmem_int_xor_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); - shmem_long_xor_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); - shmem_longlong_xor_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); + shmem_short_xor_reduce( SHMEM_TEAM_WORLD, dst0, src0, N); + shmem_int_xor_reduce( SHMEM_TEAM_WORLD, dst1, src1, N); + shmem_long_xor_reduce( SHMEM_TEAM_WORLD, dst2, src2, N); + shmem_longlong_xor_reduce(SHMEM_TEAM_WORLD, dst6, src6, N); if (me==0) { for (i = 0; i < N; i++) { @@ -698,30 +692,30 @@ xor_to_all(int me, int npes) } if(ok[0]==1) - printf("Reduction operation shmem_short_xor_to_all: Failed\n"); + printf("Reduction operation shmem_short_xor_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_short_xor_to_all: Passed\n"); + Vprintf("Reduction operation shmem_short_xor_reduce: Passed\n"); pass++; } if(ok[1]==1) - printf("Reduction operation shmem_int_xor_to_all: Failed\n"); + printf("Reduction operation shmem_int_xor_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_int_xor_to_all: Passed\n"); + Vprintf("Reduction operation shmem_int_xor_reduce: Passed\n"); pass++; } if(ok[2]==1) - printf("Reduction operation shmem_long_xor_to_all: Failed\n"); + printf("Reduction operation shmem_long_xor_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_long_xor_to_all: Passed\n"); + Vprintf("Reduction operation shmem_long_xor_reduce: Passed\n"); pass++; } if(ok[6]==1) - printf("Reduction operation shmem_longlong_xor_to_all: Failed\n"); + printf("Reduction operation shmem_longlong_xor_reduce: Failed\n"); else { - Vprintf("Reduction operation shmem_longlong_xor_to_all: Passed\n"); + Vprintf("Reduction operation shmem_longlong_xor_reduce: Passed\n"); pass++; } @@ -736,7 +730,7 @@ xor_to_all(int me, int npes) int main(int argc, char* argv[]) { - int c, i, mype, num_pes, tests, passed; + int c, mype, num_pes, tests, passed; char *pgm; shmem_init(); @@ -752,22 +746,22 @@ main(int argc, char* argv[]) while((c=getopt(argc,argv,"ampsSoxhv")) != -1) { switch(c) { case 'a': - And++; // do not run and_to_all + And++; // do not run and_reduce break; case 'm': - Min++; // do not run min_to_all + Min++; // do not run min_reduce break; case 'o': - Or++; // do not run or_to_all + Or++; // do not run or_reduce break; case 'p': - Prod++; // do not run prod_to_all + Prod++; // do not run prod_reduce break; case 's': - Sum++; // do not run sum_to_all + Sum++; // do not run sum_reduce break; case 'x': - Xor++; // do not run xor_to_all + Xor++; // do not run xor_reduce break; case 'S': Serialize++; @@ -783,45 +777,40 @@ main(int argc, char* argv[]) } } - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { - pSync[i] = SHMEM_SYNC_VALUE; - pSync1[i] = SHMEM_SYNC_VALUE; - } - tests = passed = 0; shmem_barrier_all(); - passed += max_to_all(mype, num_pes); + passed += max_reduce(mype, num_pes); tests++; if (!Min) { - passed += min_to_all(mype, num_pes); + passed += min_reduce(mype, num_pes); tests++; } if (!Sum) { - passed += sum_to_all(mype, num_pes); + passed += sum_reduce(mype, num_pes); tests++; } if (!And) { - passed += and_to_all(mype, num_pes); + passed += and_reduce(mype, num_pes); tests++; } if (!Prod) { - passed += prod_to_all(mype, num_pes); + passed += prod_reduce(mype, num_pes); tests++; } if (!Or) { - passed += or_to_all(mype, num_pes); + passed += or_reduce(mype, num_pes); tests++; } if (!Xor) { - passed += xor_to_all(mype, num_pes); + passed += xor_reduce(mype, num_pes); tests++; } diff --git a/test/unit/waituntil.c b/test/unit/waituntil.c index 0188d68..66ffd20 100644 --- a/test/unit/waituntil.c +++ b/test/unit/waituntil.c @@ -88,6 +88,8 @@ main(int argc, char* argv[]) for(pe=1; pe < num_pes; pe++) SHM_PUT(target, target, 10, pe); + shmem_fence(); + for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */ SHM_PUT(target, source, 10, pe);