From bbe5c9c47d8c91a05a8941850fc007c98283b83d Mon Sep 17 00:00:00 2001 From: Lawrence Stewart Date: Thu, 22 Aug 2024 09:24:36 -0400 Subject: [PATCH 01/72] remove duplicate text --- content/shmem_test_all_vector.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex index bad49c3a..429e4366 100644 --- a/content/shmem_test_all_vector.tex +++ b/content/shmem_test_all_vector.tex @@ -46,8 +46,7 @@ conditions. This routine compares each element of the \VAR{ivars} array in the test set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the - calling \ac{PE}. If \VAR{nelems} is 0, the test set is empty and this - routine returns 1. + calling \ac{PE}. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates whether From 560dc193486175a410c07468507d8afadb16ecac Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Tue, 20 Aug 2024 21:53:17 -0500 Subject: [PATCH 02/72] Minor updates to the text --- content/shmem_calloc.tex | 2 +- content/shmem_finalize.tex | 2 +- content/shmem_global_exit.tex | 2 +- content/shmem_init.tex | 2 ++ content/shmem_malloc_hints.tex | 4 ++-- content/shmem_ptr.tex | 2 +- content/shmem_team_ptr.tex | 2 +- content/threads_intro.tex | 2 +- main_spec.tex | 4 ++-- 9 files changed, 12 insertions(+), 10 deletions(-) diff --git a/content/shmem_calloc.tex b/content/shmem_calloc.tex index fc19de40..f0241bed 100644 --- a/content/shmem_calloc.tex +++ b/content/shmem_calloc.tex @@ -1,5 +1,5 @@ \apisummary{ - Allocate a zeroed block of symmetric memory. + Collectively allocate a zeroed block of symmetric memory. } \begin{apidefinition} diff --git a/content/shmem_finalize.tex b/content/shmem_finalize.tex index 5496e9bf..7702ec84 100644 --- a/content/shmem_finalize.tex +++ b/content/shmem_finalize.tex @@ -44,7 +44,7 @@ All processes that represent the \acp{PE} will still exist after the call to \FUNC{shmem\_finalize} returns, but they will no longer have access - to resources that have been released. + to \openshmem library resources that have been released. } \apireturnvalues{ diff --git a/content/shmem_global_exit.tex b/content/shmem_global_exit.tex index f3e49092..ced34b0c 100644 --- a/content/shmem_global_exit.tex +++ b/content/shmem_global_exit.tex @@ -48,7 +48,7 @@ terminate regardless of their current execution state. While I/O must be flushed for standard language I/O calls from \CorCpp, it is implementation dependent as to how I/O done by other means (e.g., third - party I/O libraries) is handled. Similarly, resources are released + party I/O libraries) are handled. Similarly, resources are released according to \CorCpp standard language requirements, but this may not include all resources allocated for the \openshmem program. However, a quality implementation will make a best effort to flush all I/O and clean diff --git a/content/shmem_init.tex b/content/shmem_init.tex index 6bfe2e1b..12427009 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -33,6 +33,7 @@ None. } +\begin{DeprecateBlock} \apinotes{ As of \openshmem[1.2], the use of \FUNC{start\_pes} has been deprecated and calls to it should be replaced with calls to \FUNC{shmem\_init}. @@ -43,6 +44,7 @@ case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the first one results in a no-op. } +\end{DeprecateBlock} \begin{apiexamples} diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index ef4cbfc2..c1ab840f 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -18,8 +18,8 @@ \apidescription{ - The \FUNC{shmem\_malloc\_with\_hints} routine, like \FUNC{shmem\_malloc}, - is a collective operation on the world team that returns a pointer to a block of at least + The \FUNC{shmem\_malloc\_with\_hints} routine, like \FUNC{shmem\_malloc}, is a collective operation + on the world team that returns a pointer to a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be assigned to a pointer to any type of object. This space is allocated from the symmetric heap (similar to \FUNC{shmem\_malloc}). When the \VAR{size} is zero, diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index f5c4d7e9..c9976f88 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -24,7 +24,7 @@ of an \openshmem routine that requires a symmetric address results in undefined behavior. - The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish + The \FUNC{shmem\_ptr} routine can provide efficient means to accomplish communication, for example when a sequence of reads and writes to a data object on a remote \ac{PE} does not match the access pattern provided in an \openshmem data transfer routine like \FUNC{shmem\_put} or diff --git a/content/shmem_team_ptr.tex b/content/shmem_team_ptr.tex index af158c31..c1c5fb54 100644 --- a/content/shmem_team_ptr.tex +++ b/content/shmem_team_ptr.tex @@ -25,7 +25,7 @@ an \openshmem routine that requires a symmetric address results in undefined behavior. - The \FUNC{shmem\_team\_ptr} routine can provide an efficient means to accomplish + The \FUNC{shmem\_team\_ptr} routine can provide efficient means to accomplish communication, for example when a sequence of reads and writes to a data object on a remote \ac{PE} does not match the access pattern provided in an \openshmem data transfer routine like \FUNC{shmem\_put} or diff --git a/content/threads_intro.tex b/content/threads_intro.tex index 59f134d0..3aa329c6 100644 --- a/content/threads_intro.tex +++ b/content/threads_intro.tex @@ -30,7 +30,7 @@ \begin{enumerate} \item In the \CONST{SHMEM\_THREAD\_FUNNELED}, \CONST{SHMEM\_THREAD\_SERIALIZED}, and -\CONST{SHMEM\_THREAD\_MULTIPLE} thread levels, the \FUNC{shmem\_init} and +\CONST{SHMEM\_THREAD\_MULTIPLE} thread levels, the \FUNC{shmem\_init\_thread} and \FUNC{shmem\_finalize} calls must be invoked by the same thread. \item diff --git a/main_spec.tex b/main_spec.tex index cfc3f8ae..03e10779 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -41,8 +41,8 @@ \section{Environment Variables }\label{subsec:environment_variables} \section{OpenSHMEM Library \acs{API}}\label{sec:openshmem_library_api} \subsection{Library Setup, Exit, and Query Routines} -The library setup and query interfaces that initialize and monitor the parallel -environment of the \acp{PE}. +This section specifies the library setup, exit, and query interfaces that initialize, +finalize, and monitor the parallel environment of the \acp{PE}, respectively. \subsubsection{\textbf{SHMEM\_INIT}}\label{subsec:shmem_init} \input{content/shmem_init} From 3cb46cf97ec97fd8a4aa3dbd513b9a1fa0e52815 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Wed, 21 Aug 2024 09:33:14 -0400 Subject: [PATCH 03/72] tweak to wording for shmem_init for repeatable calls --- content/shmem_init.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_init.tex b/content/shmem_init.tex index 12427009..c3cec70c 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -39,9 +39,9 @@ deprecated and calls to it should be replaced with calls to \FUNC{shmem\_init}. While support for \FUNC{start\_pes} is still required in \openshmem libraries, users are encouraged to use \FUNC{shmem\_init}. An important difference between - \FUNC{shmem\_init} and \FUNC{start\_pes} is that multiple calls to - \FUNC{shmem\_init} within a program results in undefined behavior, while in the - case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the + \FUNC{shmem\_init} and \FUNC{start\_pes} is that every call to + \FUNC{shmem\_init} within a program must be matched with a call to \FUNC{shmem\_finalize}. + while in the case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the first one results in a no-op. } \end{DeprecateBlock} From bdd5da0c443b28eca7e823689aa9887a56ba5757 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Wed, 21 Aug 2024 09:33:42 -0400 Subject: [PATCH 04/72] Tweaks to the discussion of implicit barriers in shmem_malloc/shmem_malloc_hints --- content/shmem_malloc.tex | 6 ++++-- content/shmem_malloc_hints.tex | 11 +++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/content/shmem_malloc.tex b/content/shmem_malloc.tex index 6b0b176f..142f902c 100644 --- a/content/shmem_malloc.tex +++ b/content/shmem_malloc.tex @@ -23,8 +23,10 @@ \FUNC{malloc}, which allocates from the private heap). When \VAR{size} is zero, the \FUNC{shmem\_malloc} routine performs no action and returns a null pointer; otherwise, - \FUNC{shmem\_malloc} calls a barrier on exit. - + \FUNC{shmem\_malloc} calls a procedure that is semantically equivalent + to \FUNC{shmem\_barrier\_all} on exit. This ensures that all \acp{PE} participate + in the memory allocation, and that the memory on other \acp{PE} can be used as soon as the local + \ac{PE} returns. The value of the \VAR{size} argument must be identical on all \acp{PE}; otherwise, the behavior is undefined. } diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index c1ab840f..cf7230f6 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -37,14 +37,13 @@ The \FUNC{shmem\_malloc\_with\_hints} routine is provided so that multiple \acp{PE} in a program can allocate symmetric, remotely accessible memory blocks. When no action is performed, these - routines return without performing a barrier. Otherwise, the routine will call a procedure that is semantically equivalent to \FUNC{shmem\_barrier\_all} on exit. - This ensures that all \acp{PE} participate in the memory allocation, and that the memory on other - \acp{PE} can be used as soon as the local \ac{PE} returns. The implicit barrier performed by this routine will quiet the - default context. It is the user's responsibility to ensure that no communication operations involving the given memory block are pending on - other contexts prior to calling the \FUNC{shmem\_free} and \FUNC{shmem\_realloc} routines. + routines return without performing a barrier. Otherwise, the routine will call a procedure that is semantically equivalent to \FUNC{shmem\_barrier\_all} on exit, similar to the behavior of \FUNC{shmem\_malloc}.. + % Why are these two sentences here? should the go in the shmem_malloc text instead if they are relevant? BES + % It is the user's responsibility to ensure that no communication operations involving the given memory block are pending on + %other contexts prior to calling the \FUNC{shmem\_free} and \FUNC{shmem\_realloc} routines. The user is also responsible for calling these routines with identical argument(s) on all \acp{PE}; if differing \VAR{size}, or \VAR{hints} arguments are used, the behavior of the call - and any subsequent \openshmem calls is undefined. + is undefined. } \apireturnvalues{ From 74148b3e55adb1e6eb7935d05ab7317f41b8f4b0 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Mon, 26 Aug 2024 19:41:44 -0400 Subject: [PATCH 05/72] Clean up and flesh out AMO section of overview list Add atomic fetch/swap to the list, make it clear CAS is conditional, and unify some language ("a PE" -> "the PE", "returns with" -> "returns", "that symmetric data" -> "the symmetric data") --- content/programming_model_overview.tex | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index a76c99de..534058fe 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -81,9 +81,12 @@ \item \textbf{\acfp{AMO}} \begin{enumerate} - \item \OPR{Swap}: The \ac{PE} initiating the swap gets the old value of a - symmetric data object from a remote \ac{PE} and copies a new value to - that symmetric data object on the remote \ac{PE}. + \item \OPR{Fetch}: The \ac{PE} initiating the fetch returns the value of the + symmetric data object on the remote \ac{PE}. + \item \OPR{Set}: The \ac{PE} initiating the set copies a new value to the + symmetric data object on the remote \ac{PE}. + \item \OPR{Swap}: The \ac{PE} initiating the swap copies a new value to the + symmetric data object on the remote \ac{PE} and returns the old value. \item \OPR{Increment}: The \ac{PE} initiating the increment adds 1 to the symmetric data object on the remote \ac{PE}. \item \OPR{Add}: The \ac{PE} initiating the add specifies the value to be added @@ -91,14 +94,14 @@ \item \OPR{Bitwise Operations}: The \ac{PE} initiating the bitwise operation specifies the operand value to the bitwise operation to be performed on the symmetric data object on the remote \ac{PE}. - \item \OPR{Compare and Swap}: The \ac{PE} initiating the swap gets the old value - of the symmetric data object based on a value to be compared and copies a - new value to the symmetric data object on the remote \ac{PE}. - \item \OPR{Fetch and Increment}: The \ac{PE} initiating the increment adds 1 to - the symmetric data object on the remote \ac{PE} and returns with the old + \item \OPR{Compare and Swap}: The \ac{PE} initiating the compare and swap + conditionally copies a new value to the symmetric data object on the + remote \ac{PE} and returns the old value. + \item \OPR{Fetch and Increment}: The \ac{PE} initiating the increment adds 1 + to the symmetric data object on the remote \ac{PE} and returns the old value. \item \OPR{Fetch and Add}: The \ac{PE} initiating the add specifies the value to - be added to the symmetric data object on the remote \ac{PE} and returns with + be added to the symmetric data object on the remote \ac{PE} and returns the old value. \item \OPR{Fetch and Bitwise Operations}: The \ac{PE} initiating the bitwise operation specifies the operand value to the bitwise operation to be From 6d0655bd4bd8ac9419dc4f4b1bf000caea7bb1c8 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 07:56:58 -0400 Subject: [PATCH 06/72] Clean up and add new routines to collective section of the overview list Add scan, avoid active set language, don't say broadcast avoids copying to self since the teams based version does do that. --- content/programming_model_overview.tex | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 534058fe..6c8cf8a6 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -138,7 +138,7 @@ \begin{enumerate} \item \OPR{Broadcast}: The \VAR{root} \ac{PE} specifies a symmetric data object to be copied to a symmetric data object on one or more remote - \acp{PE} (not including itself). + \acp{PE}. \item \OPR{Collection}: All \acp{PE} participating in the routine get the result of concatenated symmetric objects contributed by each of the \acp{PE} in another symmetric data object. @@ -146,8 +146,11 @@ of an associative binary routine over elements of the specified symmetric data object on another symmetric data object. \item \OPR{All-to-All}: All \acp{PE} participating in the routine exchange - a fixed amount of contiguous or strided data with all other \acp{PE} - in the active set. + a fixed amount of contiguous or strided data with all other participating + \acp{PE}. + \item \OPR{Scan}: All \acp{PE} participating in the routine perform an + inclusive or exclusive prefix sum over elements of the specified + symmetric data object. \end{enumerate} \item \textbf{Mutual Exclusion} From 17b6eeec3fb90f34c9607ae5cc56375940064f2f Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 08:27:38 -0400 Subject: [PATCH 07/72] Note allocation routines are collective in overview list This makes the collective nature more explicit to match language in the allocation section. --- content/programming_model_overview.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 6c8cf8a6..1fc4d508 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -43,11 +43,11 @@ \item \textbf{Symmetric Data Object Management} \begin{enumerate} - \item \OPR{Allocation}: All executing \acp{PE} must participate in the + \item \OPR{Allocation}: All executing \acp{PE} must collectively participate in the allocation of a symmetric data object with identical arguments. - \item \OPR{Deallocation}: All executing \acp{PE} must participate in the + \item \OPR{Deallocation}: All executing \acp{PE} must collectively participate in the deallocation of the same symmetric data object with identical arguments. - \item \OPR{Reallocation}: All executing \acp{PE} must participate in the + \item \OPR{Reallocation}: All executing \acp{PE} must collectively participate in the reallocation of the same symmetric data object with identical arguments. \end{enumerate} From 68b4caf620021ceff3a2b4a52fb2fd18f44a2957 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 08:27:59 -0400 Subject: [PATCH 08/72] Clean up and add new routines to the signaling section of the overview list Update put signal to match the language of a regular put more closely and add signal set/add/fetch. --- content/programming_model_overview.tex | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 1fc4d508..d95d1778 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -111,9 +111,16 @@ \item \textbf{Signaling Operations} \begin{enumerate} - \item \OPR{Signaling Put}: The \source{} data is copied to the symmetric - object on the remote \ac{PE} and a flag on the remote \ac{PE} is subsequently - updated to signal completion. + \item \OPR{Put Signal}: The local \ac{PE} specifies the \source{} data object + to be copied to the symmetric data object on the remote \ac{PE} and + another symmetric data object on the remote \ac{PE} is subsequently + updated to signal completion. + \item \OPR{Signal Add}: The local \ac{PE} specifies a value to be added to + the symmetric data object on the remote \ac{PE}. + \item \OPR{Signal Set}: The local \ac{PE} specifies a value to be copied to + the symmetric data object on the remote \ac{PE}. + \item \OPR{Signal Fetch}: The local \ac{PE} returns the value of a local data + object. \end{enumerate} \item \textbf{Synchronization and Ordering} From 83d8bd609d7557b257071045f9c92354353710b8 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 09:20:35 -0400 Subject: [PATCH 09/72] Add sessions to overview list --- content/programming_model_overview.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index d95d1778..6d53ece1 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -123,6 +123,13 @@ object. \end{enumerate} +\item \textbf{Session Management} +\begin{enumerate} + \item \OPR{Sessions}: Sessions are a mechanism for the application to inform + the implementation about an upcoming sequence of operations that exhibit + a pattern that may be suitable for runtime optimization. +\end{enumerate} + \item \textbf{Synchronization and Ordering} \begin{enumerate} \item \OPR{Fence}: The \ac{PE} calling fence ensures ordering of From b2e630041080fb42ad42f281cf1667fe13c11054 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 10:45:05 -0400 Subject: [PATCH 10/72] Add shmem_team_ptr to memory model text about getting local pointer --- content/memory_model.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/memory_model.tex b/content/memory_model.tex index 20f46b37..58cf9777 100644 --- a/content/memory_model.tex +++ b/content/memory_model.tex @@ -71,7 +71,7 @@ \subsection{Pointers to Symmetric Objects}\label{subsec:pointers_to_symmetric_ob The ``mem'' interfaces (e.g., \FUNC{shmem\_putmem}) have no alignment requirements. -The \FUNC{shmem\_ptr} routine allows the programmer to query a {\em local +The \FUNC{shmem\_ptr} and \FUNC{shmem\_team\_ptr} routines allow the application to query a {\em local address} to a remotely accessible data object at a specified \ac{PE}. The resulting pointer is valid for direct memory access; however, providing this address as an argument of an \openshmem routine that requires a symmetric From 91658e897238a1cf16dd506921cb24e9b9566c30 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 10:49:35 -0400 Subject: [PATCH 11/72] Update atomic example 3 to avoid deprecated active-set collective Switch example 3 from `shmem_int_sum_to_all` to `shmem_int_sum_reduce` since the former is deprecated. --- example_code/amo_scenario_3.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/example_code/amo_scenario_3.c b/example_code/amo_scenario_3.c index 93586779..2091b09f 100644 --- a/example_code/amo_scenario_3.c +++ b/example_code/amo_scenario_3.c @@ -1,19 +1,14 @@ #include int main(void) { - static long psync[SHMEM_REDUCE_SYNC_SIZE]; - static int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; static int x = 0, y = 0; - for (int i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; - shmem_init(); shmem_int_atomic_inc(&x, (shmem_my_pe() + 1) % shmem_n_pes()); /* Undefined behavior: The following reduction operation performs accesses to * symmetric variable 'x' that are concurrent with previously issued atomic * increment operations on the same variable. */ - shmem_int_sum_to_all(&y, &x, 1, 0, 0, shmem_n_pes(), pwrk, psync); + shmem_int_sum_reduce(SHMEM_TEAM_WORLD, &y, &x, 1); shmem_finalize(); return 0; From 84271957051b3334186fdf712126bec729c9f86c Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 11:57:00 -0400 Subject: [PATCH 12/72] Add session constants to the library constant table Add `SHMEM_CTX_SESSION_TOTAL_OPS` and `SHMEM_CTX_SESSION_BATCH` to the library constant table (text based `SHMEM_TEAM_NUM_CONTEXTS` and `SHMEM_CTX_NOSTORE` respectively since they have similar uses) --- content/library_constants.tex | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/content/library_constants.tex b/content/library_constants.tex index 0a0194de..b0edb9cf 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -84,6 +84,19 @@ See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. \tabularnewline \hline %% +\LibConstDecl{SHMEM\_CTX\_SESSION\_TOTAL\_OPS} & +The bitwise flag which specifies that a session start routine should use the +\VAR{total\_ops} member of the provided \CTYPE{shmem\_ctx\_session\_config\_t} +configuration parameter as a hint. See \ref{subsec:shmem_ctx_session_config_t} +for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CTX\_SESSION\_BATCH} & +The session start option which specifies that operations in the given session +are latency tolerant and may be candidates for batching. See +\ref{subsec:shmem_ctx_session_start} for more detail about its use. +\tabularnewline \hline +%% \LibConstDecl{SHMEM\_SIGNAL\_SET} & An integer constant expression corresponding to the signal update set operation. See Section~\ref{subsec:shmem_put_signal} and From 634db0883fc668ad6168ece0005e603161a3357c Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Tue, 27 Aug 2024 20:49:28 -0500 Subject: [PATCH 13/72] PR 514 content: signal set add update --- content/shmem_signal_add.tex | 6 +-- content/shmem_signal_set.tex | 6 +-- content/shmem_signal_wait_until.tex | 2 +- content/signaling.tex | 57 ++++++++++++++++++++++++++++ main_spec.tex | 59 +---------------------------- 5 files changed, 65 insertions(+), 65 deletions(-) create mode 100644 content/signaling.tex diff --git a/content/shmem_signal_add.tex b/content/shmem_signal_add.tex index 272b03a6..362fbeef 100644 --- a/content/shmem_signal_add.tex +++ b/content/shmem_signal_add.tex @@ -5,12 +5,12 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_signal\_add}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_signal\_add}@(shmem_ctx_t ctx, uint64_t *sig_addr, uint64_t signal, int pe); \end{C11synopsis} \begin{Csynopsis} -void @\FuncDecl{shmem\_signal\_add}@(const uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_signal\_add}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_signal\_add}@(uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_signal\_add}@(shmem_ctx_t ctx, uint64_t *sig_addr, uint64_t signal, int pe); \end{Csynopsis} \begin{apiarguments} diff --git a/content/shmem_signal_set.tex b/content/shmem_signal_set.tex index 00c70345..d0eefbfd 100644 --- a/content/shmem_signal_set.tex +++ b/content/shmem_signal_set.tex @@ -5,12 +5,12 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_signal\_set}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_signal\_set}@(shmem_ctx_t ctx, uint64_t *sig_addr, uint64_t signal, int pe); \end{C11synopsis} \begin{Csynopsis} -void @\FuncDecl{shmem\_signal\_set}@(const uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_signal\_set}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_signal\_set}@(uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_signal\_set}@(shmem_ctx_t ctx, uint64_t *sig_addr, uint64_t signal, int pe); \end{Csynopsis} \begin{apiarguments} diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index 5d93ec7f..564c5a77 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -11,7 +11,7 @@ \begin{apiarguments} -\apiargument{IN}{sig\_addr}{Local address of the source signal variable.} +\apiargument{IN}{sig\_addr}{Local address of the remotely accessible source signal variable.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{sig\_addr} with \VAR{cmp\_value}.} \apiargument{IN}{cmp\_value}{The value against which the object pointed to diff --git a/content/signaling.tex b/content/signaling.tex new file mode 100644 index 00000000..bd04940b --- /dev/null +++ b/content/signaling.tex @@ -0,0 +1,57 @@ +This section specifies the OpenSHMEM support for \OPR{put-with-signal}, +nonblocking \OPR{put-with-signal}, and \OPR{signal-\{add, fetch, set\}} routines. The +put-with-signal routines provide a method for copying data from a contiguous +local data object to a data object on a specified \ac{PE} and subsequently +updating a remote flag to signal completion. +The signal-add and signal-set routines provide methods for updating +the signal object without the associated data transfer of a +put-with-signal operation. +The signal-fetch routine provides support for reading a local signal value. + +\openshmem \OPR{put-with-signal} and \OPR{signal-\{add, set\}} +routines specified in this section have two +variants. In one of the variants, the context handle, \VAR{ctx}, is explicitly +passed as an argument. In this variant, the operation is performed on the +specified context. If the context handle \VAR{ctx} does not correspond to a +valid context, the behavior is undefined. In the other variant, the context +handle is not explicitly passed and thus, the operations are performed on the +default context. + +\subsubsection{Atomicity Guarantees for Signaling Operations} +\label{subsec:signal_atomicity} +All signaling operations put-with-signal, nonblocking put-with-signal, and +signal-\{add, fetch, set\} are performed on a signal data object, a remotely accessible +symmetric object of type \VAR{uint64\_t}. A signal operator in the +put-with-signal routine is an \openshmem library constant that determines the +type of update to be performed as a signal on the signal data object. + +All signaling operations on the signal data object complete as if performed +atomically with respect to the following: +\begin{itemize} + \item other blocking or nonblocking variant of the put-with-signal routine + that updates the signal data object using the same signal update operator; + \item signal-add routine when the put-with-signal routine uses the + \LibConstRef{SHMEM\_SIGNAL\_ADD} signal operator; + \item signal-set routine when the put-with-signal routine uses the + \LibConstRef{SHMEM\_SIGNAL\_SET} signal operator; + \item signal-fetch routine that fetches the signal data object; and + \item any point-to-point synchronization routine that accesses the signal + data object. +\end{itemize} + +\subsubsection{Available Signal Operators} +\label{subsec:signal_operator} + +With the atomicity guarantees as described in +Section~\ref{subsec:signal_atomicity}, the following options can be used as a +signal operator. + + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to signal data + object is an atomic set operation. It writes an unsigned 64-bit value as a + signal into the signal data object on a remote \VAR{PE} as an atomic + operation.} + + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to signal data + object is an atomic add operation. It adds an unsigned 64-bit value as a + signal into the signal data object on a remote \VAR{PE} as an atomic + operation.} diff --git a/main_spec.tex b/main_spec.tex index cfc3f8ae..fe83dcb5 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -306,64 +306,7 @@ \subsubsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_XOR\_NBI}} \subsection{Signaling Operations}\label{sec:shmem_signal} -This section specifies the OpenSHMEM support for \OPR{put-with-signal}, -nonblocking \OPR{put-with-signal}, and \OPR{signal-\{add, fetch, set\}} routines. The -put-with-signal routines provide a method for copying data from a contiguous -local data object to a data object on a specified \ac{PE} and subsequently -updating a remote flag to signal completion. -The signal-add and signal-set routines provide methods for updating -the signal object without the associated data transfer of a -put-with-signal operation. -The signal-fetch routine provides support for reading a local signal value. - -\openshmem \OPR{put-with-signal} and \OPR{signal-\{add, set\}} -routines specified in this section have two -variants. In one of the variants, the context handle, \VAR{ctx}, is explicitly -passed as an argument. In this variant, the operation is performed on the -specified context. If the context handle \VAR{ctx} does not correspond to a -valid context, the behavior is undefined. In the other variant, the context -handle is not explicitly passed and thus, the operations are performed on the -default context. - -\subsubsection{Atomicity Guarantees for Signaling Operations} -\label{subsec:signal_atomicity} -All signaling operations put-with-signal, nonblocking put-with-signal, and -signal-\{add, fetch, set\} are performed on a signal data object, a remotely accessible -symmetric object of type \VAR{uint64\_t}. A signal operator in the -put-with-signal routine is an \openshmem library constant that determines the -type of update to be performed as a signal on the signal data object. - -All signaling operations on the signal data object complete as if performed -atomically with respect to the following: -\begin{itemize} - \item other blocking or nonblocking variant of the put-with-signal routine - that updates the signal data object using the same signal update operator; - \item signal-add routine when the put-with-signal routine uses the - \LibConstRef{SHMEM\_SIGNAL\_ADD} signal operator; - \item signal-set routine when the put-with-signal routine uses the - \LibConstRef{SHMEM\_SIGNAL\_SET} signal operator; - \item signal-fetch routine that fetches the signal data object; and - \item any point-to-point synchronization routine that accesses the signal - data object. -\end{itemize} - -\subsubsection{Available Signal Operators} -\label{subsec:signal_operator} - -With the atomicity guarantees as described in -Section~\ref{subsec:signal_atomicity}, the following options can be used as a -signal operator. - - \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to signal data - object is an atomic set operation. It writes an unsigned 64-bit value as a - signal into the signal data object on a remote \VAR{PE} as an atomic - operation.} - - \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to signal data - object is an atomic add operation. It adds an unsigned 64-bit value as a - signal into the signal data object on a remote \VAR{PE} as an atomic - operation.} - +\input{content/signaling.tex} \subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} \input{content/shmem_put_signal.tex} From 096f92e0804eaa024cdfacad9048908a03c01ee5 Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Tue, 27 Aug 2024 21:14:55 -0500 Subject: [PATCH 14/72] Minor update reverted to match master --- content/shmem_malloc_hints.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index cf7230f6..82313c06 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -18,8 +18,8 @@ \apidescription{ - The \FUNC{shmem\_malloc\_with\_hints} routine, like \FUNC{shmem\_malloc}, is a collective operation - on the world team that returns a pointer to a block of at least + The \FUNC{shmem\_malloc\_with\_hints} routine, like \FUNC{shmem\_malloc}, + is a collective operation on the world team that returns a pointer to a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be assigned to a pointer to any type of object. This space is allocated from the symmetric heap (similar to \FUNC{shmem\_malloc}). When the \VAR{size} is zero, From d1e22fb714dcb21a466e4f92be4bcb9ff9e834d6 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 29 Aug 2024 06:02:39 -0400 Subject: [PATCH 15/72] [Annex C] Remove trapped and replace with detect --- content/backmatter.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 16e7ffcc..9f6a19a3 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -151,9 +151,9 @@ \chapter{Undefined Behavior in OpenSHMEM}\label{sec:undefined} \tabularnewline \hline Use of non-symmetric variables & Some routines require remotely accessible -variables to perform their function. For example, a \PUT{} to a non-symmetric variable may -be trapped where possible and the library may abort the program. Another -implementation may choose to continue execution with or without a warning. +variables to perform their function. For example, an \openshmem libray may detect a \PUT{} to a non-symmetric variable +and choose to abort the program. +However, another implementation may choose to continue execution with or without a warning. \tabularnewline \hline Non-symmetric allocation of symmetric memory & The symmetric memory management routines are From e31215b7e49f12315cce686c1535cea5121fbf99 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 29 Aug 2024 06:17:07 -0400 Subject: [PATCH 16/72] [Appendix D 1.4] Add shmem_my_pe as a standard way to get process ID --- content/interoperability.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index bb9ed5a1..7347ee87 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -119,7 +119,7 @@ \subsection{Mapping Process Identification Numbers} This feature, however, may be provided by only some of the \openshmem and \ac{MPI} implementations (e.g., if both environments share the same underlying process manager) and is not portably guaranteed. A portable program should always -use the standard functions in each model, namely, \FUNC{shmem\_team\_my\_pe} in \openshmem +use the standard functions in each model, namely, \FUNC{shmem\_team\_my\_pe} or \FUNC{shmem\_my\_pe} in \openshmem and \FUNC{MPI\_Comm\_rank} in \ac{MPI}, to query the process identification numbers in each communication environment and manage the mapping of identifiers in the program when necessary. From 71aa81adcc7d312322331716e208c95a83df2d8a Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 29 Aug 2024 07:31:26 -0400 Subject: [PATCH 17/72] [ChangeLog] Add inclusive and exlcusive scan --- content/backmatter.tex | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 16e7ffcc..89d3d426 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -655,6 +655,10 @@ \section{Version 1.6} The following list describes the specific changes in \openshmem[1.6]: \begin{itemize} % +\item Added an inclusive (\FUNC{shmem\_sum\_inscan}) and exclusive +(\FUNC{shmem\_sum\_exscan}) collective summation operation. +\ChangelogRef{subsec:shmem_scan} +% \item Added support for initialization and finalization routines to be called multiple times, and added an initialization status query API \FUNC{shmem\_query\_initialized}. From ac518923c3e21fd91c784a35054d8049c5cb21d6 Mon Sep 17 00:00:00 2001 From: Brandon Potter Date: Thu, 29 Aug 2024 09:25:14 -0500 Subject: [PATCH 18/72] profiler: remove whitespace at end of line --- content/profiling_interface.tex | 94 ++++++++++++++++----------------- content/shmem_pcontrol.tex | 22 ++++---- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/content/profiling_interface.tex b/content/profiling_interface.tex index e50ab8d7..51a50909 100644 --- a/content/profiling_interface.tex +++ b/content/profiling_interface.tex @@ -1,74 +1,74 @@ -The objective of the \openshmem profiling interface is to ensure an -easy and flexible usage model for profiling (and other similar) -tool developers to interface their codes into \openshmem -implementations on different platforms. Since \openshmem is a -machine-independent standard with different implementations, it is -unreasonable to expect that the authors and developers of profiling -tools for \openshmem will have access to the source code that -implements \openshmem on any particular machine. It is, therefore, -necessary to provide a mechanism by which the implementors of such -tools can collect whatever performance information they wish +The objective of the \openshmem profiling interface is to ensure an +easy and flexible usage model for profiling (and other similar) +tool developers to interface their codes into \openshmem +implementations on different platforms. Since \openshmem is a +machine-independent standard with different implementations, it is +unreasonable to expect that the authors and developers of profiling +tools for \openshmem will have access to the source code that +implements \openshmem on any particular machine. It is, therefore, +necessary to provide a mechanism by which the implementors of such +tools can collect whatever performance information they wish \emph{without} access to the underlying implementation. -The \openshmem profiling interface places the following requirements -on implementations. +The \openshmem profiling interface places the following requirements +on implementations. \begin{enumerate} -\item An \openshmem implementation must provide a mechanism through -which all of the \openshmem defined functions may be accessible -with a name shift. This requires an alternate -entry point name, with the prefix \FUNC{pshmem\_} for each -\openshmem function. For \openshmem inlined functions (e.g., macros), -it is also required that the \FUNC{pshmem\_} version is supplied -although it is not possible to replace the \FUNC{shmem\_} version +\item An \openshmem implementation must provide a mechanism through +which all of the \openshmem defined functions may be accessible +with a name shift. This requires an alternate +entry point name, with the prefix \FUNC{pshmem\_} for each +\openshmem function. For \openshmem inlined functions (e.g., macros), +it is also required that the \FUNC{pshmem\_} version is supplied +although it is not possible to replace the \FUNC{shmem\_} version with a user-defined version at link time. -\item It must be ensured that the \openshmem functions that are not -replaced as above, may still be linked into an executable image -without causing name clashes. -\item Documentation of the implementation of different language -bindings of the \openshmem interface must indicate if they -are layered on top of each other. Using this documentation, -developers can determine whether they need to implement the -profile interface for each binding or not. For example, it must -be noted that the \openshmem \Cstd[11] type-generic interfaces for +\item It must be ensured that the \openshmem functions that are not +replaced as above, may still be linked into an executable image +without causing name clashes. +\item Documentation of the implementation of different language +bindings of the \openshmem interface must indicate if they +are layered on top of each other. Using this documentation, +developers can determine whether they need to implement the +profile interface for each binding or not. For example, it must +be noted that the \openshmem \Cstd[11] type-generic interfaces for different \ac{RMA} and \ac{AMO} operations cannot have any equivalent -\FUNC{pshmem\_} interfaces because the \Cstd[11] type-generic +\FUNC{pshmem\_} interfaces because the \Cstd[11] type-generic interfaces are implemented as macros. -\item In the case where the implementation of different \ac{API} -feature sets is implemented through a layered approach using -``wrapper'' functions, the wrapper functions must be kept separate -from the rest of the library. This requirement allows the developers -to extract these functions from the original \openshmem library -and add them into the profiling library without bringing along any +\item In the case where the implementation of different \ac{API} +feature sets is implemented through a layered approach using +``wrapper'' functions, the wrapper functions must be kept separate +from the rest of the library. This requirement allows the developers +to extract these functions from the original \openshmem library +and add them into the profiling library without bringing along any other code. -\item A no-op routine, \FUNC{shmem\_pcontrol}, must be provided +\item A no-op routine, \FUNC{shmem\_pcontrol}, must be provided in the \openshmem library. -\item It must be ensured that any \openshmem types or constants that are +\item It must be ensured that any \openshmem types or constants that are needed by the \FUNC{pshmem\_} interfaces are defined in \HEADER{pshmem.h}. \end{enumerate} -Provided that an \openshmem implementation meets these requirements, -it is possible for the implementor of the profiling system -to intercept the \openshmem calls that are made by the user -program. The information required can be collected before and after -calling the underlying \openshmem implementation through the name -shifted entry points. +Provided that an \openshmem implementation meets these requirements, +it is possible for the implementor of the profiling system +to intercept the \openshmem calls that are made by the user +program. The information required can be collected before and after +calling the underlying \openshmem implementation through the name +shifted entry points. \subsection{Control of Profiling} \label{sec:pshmem_control_profile} -Any user code must be able to control the profiler dynamically -during runtime. Generally, this capability is used for the +Any user code must be able to control the profiler dynamically +during runtime. Generally, this capability is used for the purposes of \begin{itemize} -\item Enabling and disabling of profiling based on the current +\item Enabling and disabling of profiling based on the current state of the execution and calculation, \item Flushing of the trace buffers at noncritical execution regions, \item Adding user events to a trace file. \end{itemize} -These functionalities can be achieved through the usage of +These functionalities can be achieved through the usage of \FUNC{shmem\_pcontrol}. \subsubsection{\textbf{SHMEM\_PCONTROL}}\label{subsec:shmem_pcontrol} diff --git a/content/shmem_pcontrol.tex b/content/shmem_pcontrol.tex index a20f61e4..79abbbd7 100644 --- a/content/shmem_pcontrol.tex +++ b/content/shmem_pcontrol.tex @@ -15,7 +15,7 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_pcontrol} sets the profiling level and any other + \FUNC{shmem\_pcontrol} sets the profiling level and any other library defined effects through additional arguments. \openshmem libraries make no use of this routine and simply return immediately to the user code. } @@ -25,26 +25,26 @@ } \apinotes{ - Since \openshmem has no control of the implementation of the profiling code, - it is impossible to precisely specify the semantics that will be provided by - calls to \FUNC{shmem\_pcontrol}. This vagueness extends to the number of - arguments to the function and their datatypes. However, to provide some - level of portability of user codes to different profiling libraries, the + Since \openshmem has no control of the implementation of the profiling code, + it is impossible to precisely specify the semantics that will be provided by + calls to \FUNC{shmem\_pcontrol}. This vagueness extends to the number of + arguments to the function and their datatypes. However, to provide some + level of portability of user codes to different profiling libraries, the following \VAR{level} values are recommended. \begin{itemize} \item \texttt{level <= 0} Profiling is disabled. \item \texttt{level == 1} Profiling is enabled at the default level of detail. - \item \texttt{level == 2} Profiling is enabled and profile buffers are + \item \texttt{level == 2} Profiling is enabled and profile buffers are flushed if available. - \item \texttt{level > 2} Profiling is enabled with profile library defined + \item \texttt{level > 2} Profiling is enabled with profile library defined effects and additional arguments. \end{itemize} - The default state after \FUNC{shmem\_init} is recommended to have profiling + The default state after \FUNC{shmem\_init} is recommended to have profiling enabled at the default level of detail (\texttt{level == 1}). This allows users - to link with a profiling library and to obtain profile output without - having to modify the user-level source code. + to link with a profiling library and to obtain profile output without + having to modify the user-level source code. } \end{apidefinition} From b0e7115bbc8728bd783dff47fc6760fd22840297 Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Thu, 29 Aug 2024 10:19:26 -0500 Subject: [PATCH 19/72] Minor text edits --- content/shmem_init.tex | 2 +- content/shmem_malloc_hints.tex | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/content/shmem_init.tex b/content/shmem_init.tex index c3cec70c..f2d5e4c2 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -41,7 +41,7 @@ users are encouraged to use \FUNC{shmem\_init}. An important difference between \FUNC{shmem\_init} and \FUNC{start\_pes} is that every call to \FUNC{shmem\_init} within a program must be matched with a call to \FUNC{shmem\_finalize}. - while in the case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the + In the case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the first one results in a no-op. } \end{DeprecateBlock} diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index 82313c06..bca3c1a4 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -27,7 +27,7 @@ In addition to the \VAR{size} argument, the \VAR{hints} argument is provided by the user. The \VAR{hints} describes the expected manner in which the \openshmem program may use the allocated memory. - The valid usage hints are described in Table~\ref{usagehints}. Multiple hints may be requested by combining them with a bitwise \CONST{OR} operation. + The valid usage of hints are described in Table~\ref{usagehints}. Multiple hints may be requested by combining them with a bitwise \CONST{OR} operation. A zero option can be given if no options are requested. The information provided by the \VAR{hints} is used to optimize for performance by the implementation. @@ -37,11 +37,9 @@ The \FUNC{shmem\_malloc\_with\_hints} routine is provided so that multiple \acp{PE} in a program can allocate symmetric, remotely accessible memory blocks. When no action is performed, these - routines return without performing a barrier. Otherwise, the routine will call a procedure that is semantically equivalent to \FUNC{shmem\_barrier\_all} on exit, similar to the behavior of \FUNC{shmem\_malloc}.. - % Why are these two sentences here? should the go in the shmem_malloc text instead if they are relevant? BES - % It is the user's responsibility to ensure that no communication operations involving the given memory block are pending on - %other contexts prior to calling the \FUNC{shmem\_free} and \FUNC{shmem\_realloc} routines. - The user is also responsible for calling these routines with identical argument(s) on all + routines return without performing a barrier. Otherwise, the routine will call a procedure that is + semantically equivalent to \FUNC{shmem\_barrier\_all} on exit, similar to the behavior of \FUNC{shmem\_malloc}. + The user is responsible for calling this routine with identical argument(s) on all \acp{PE}; if differing \VAR{size}, or \VAR{hints} arguments are used, the behavior of the call is undefined. } From 930bbda5177cdc03c0ec29ae8b53893ea84fbd81 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 28 Aug 2024 12:39:00 -0400 Subject: [PATCH 20/72] backmatter: Add an "Errata" section w/ 1.5 entries --- content/backmatter.tex | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 89d3d426..9fdbe1b6 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -1272,4 +1272,41 @@ \section{Version 1.1} % \end{itemize} +\chapter{Errata}\label{sec:errata} + +The \openshmem specification may occasionally include errata that are +discovered after the release of new versions. +These errata can range from typographical mistakes to more significant +technical inaccuracies that may affect the implementation or understanding of +the \openshmem API semantics. +This Errata section reports these issues with the goal to maintain the +integrity and utility of the OpenSHMEM specification. + +The sections below documents all known errata, their corresponding corrections, +and the affected version of the OpenSHMEM specification. +It serves as a historical record of the changes made and assists users and +implementers with applying the necessary corrections. + +\section{Version 1.5} + +\begin{itemize} + \item Clarified that \FUNC{shmem\_test\_all} and + \FUNC{shmem\_test\_all\_vector} routines return 1 when the test set is empty + (\href{https://github.com/openshmem-org/specification/pull/466}{\#466}). + \item Clarified that \FUNC{shmem\_team\_split\_strided} and + \FUNC{shmem\_team\_split\_2d} return nonzero when the parent team is + \LibConstRef{SHMEM\_TEAM\_INVALID} + (\href{https://github.com/openshmem-org/specification/pull/461}{\#461}). + \item Corrected the \VAR{level} argument's recommended value in API notes for + \FUNC{shmem\_pcontrol} to indicate that the value should be greater than 2 to enable + profiling with profile library defined effects and additional arguments + (\href{https://github.com/openshmem-org/specification/pull/480}{\#480}). + \item Clarified that fence operations only guarantee ordering for operations + that are performed on the same context + (\href{https://github.com/openshmem-org/specification/pull/496}{\#496}). + \item Clarified that \VAR{source} and \VAR{dest} arrays must be the same + across \acp{PE} in \openshmem reductions + (\href{https://github.com/openshmem-org/specification/pull/490}{\#490}). +\end{itemize} + %end of setlength command that was started in frontmatter.tex From 5e3bc96b495281fa9063106cb6be8d6698edce3a Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 28 Aug 2024 12:51:17 -0400 Subject: [PATCH 21/72] Update content/backmatter.tex --- content/backmatter.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 9fdbe1b6..877305ac 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -1282,7 +1282,7 @@ \chapter{Errata}\label{sec:errata} This Errata section reports these issues with the goal to maintain the integrity and utility of the OpenSHMEM specification. -The sections below documents all known errata, their corresponding corrections, +The sections below document all known errata, their corresponding corrections, and the affected version of the OpenSHMEM specification. It serves as a historical record of the changes made and assists users and implementers with applying the necessary corrections. From 47feb0dfe0755ef97c57d6988b1d98b1ddaa0d83 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 12:08:06 -0400 Subject: [PATCH 22/72] Update content/backmatter.tex Co-authored-by: James Dinan --- content/backmatter.tex | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 877305ac..ccdf8058 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -1282,9 +1282,12 @@ \chapter{Errata}\label{sec:errata} This Errata section reports these issues with the goal to maintain the integrity and utility of the OpenSHMEM specification. -The sections below document all known errata, their corresponding corrections, -and the affected version of the OpenSHMEM specification. -It serves as a historical record of the changes made and assists users and +Errors or ambiguities in the \openshmem specification may be discovered after +publication. Errata, or corrections, are included in the +the sections below indicating the version of the OpenSHMEM specification +that required the correction or clarification. These corrections have been applied +to all subsequent versions of the specification and this section +serves as a historical record of the changes made to assist users and implementers with applying the necessary corrections. \section{Version 1.5} From 2ae28b47f8cd94ce0eba36d46eba772d79460784 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 12:13:06 -0400 Subject: [PATCH 23/72] backmatter: changelogs to enumerate, rm git links --- content/backmatter.tex | 61 +++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index ccdf8058..78ad1186 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -648,12 +648,13 @@ \subsection{Table~\ref{p2psynctypes}: point-to-point synchronization types} \chapter{Changes to this Document}\label{sec:changelog} \section{Version 1.6} +\label{changelog:v1.6} Major changes in \openshmem[1.6] include the addition of the new \FUNC{shmem\_team\_ptr}, \FUNC{shmem\_ibget}, and \FUNC{shmem\_ibput} functions. The following list describes the specific changes in \openshmem[1.6]: -\begin{itemize} +\begin{enumerate} % \item Added an inclusive (\FUNC{shmem\_sum\_inscan}) and exclusive (\FUNC{shmem\_sum\_exscan}) collective summation operation. @@ -673,7 +674,7 @@ \section{Version 1.6} \ChangelogRef{subsec:shmem_signal_add, subsec:shmem_signal_set}% % \item Clarified that \OPR{Fence} operations only guarantee ordering for - operations that are performed on the same context. + operations that are performed on the same context. \label{changelog:fence_ctx} \ChangelogRef{subsec:shmem_fence}% % \item Added a team-based pointer query routine: @@ -682,12 +683,12 @@ \section{Version 1.6} % \item Clarified that \FUNC{shmem\_team\_split\_strided} and \FUNC{shmem\_team\_split\_strided} return a nonzero value when the parent - team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. + team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. \label{changelog:split_strided_2d} \ChangelogRef{subsec:shmem_team_split_strided, subsec:shmem_team_split_2d}% % \item Removed \openshmem[1.5] Table 9, which was an incomplete duplicate of \openshmem[1.5] Table 10, and clarified the types, names, and supporting - operations for team-based reductions. + operations for team-based reductions. \label{changelog:reduction_table} \ChangelogRef{teamreducetypes}% % \item Added the session routines, \FUNC{shmem\_ctx\_session\_start} and @@ -710,7 +711,7 @@ \section{Version 1.6} \item Corrected the level argument's recommended value in API notes for \FUNC{shmem\_pcontrol} to indicate that the value should be greater than 2 to enable profiling with profile library defined effects and - additional arguments. + additional arguments. \label{changelog:pcontrol} \ChangelogRef{subsec:shmem_pcontrol} % \item Clarified that \FUNC{shmem\_team\_get\_config} returns the current @@ -726,7 +727,15 @@ \section{Version 1.6} stride argument is 0 or negative. \ChangelogRef{subsec:shmem_team_split_strided} % -\end{itemize} +\item Clarified that \FUNC{shmem\_test\_all} and \FUNC{shmem\_test\_all\_vector} + routines return 1 when the test set is empty. \label{changelog:test_all} +\ChangelogRef{subsec:shmem_test_all,subsec:shmem_test_all_vector}% +% +\item Clarified that \VAR{source} and \VAR{dest} arrays must be the same + across \acp{PE} in \openshmem reductions \label{changelog:reduction_args} +\ChangelogRef{subsec:shmem_reductions} +% +\end{enumerate} \section{Version 1.5} Major changes in \openshmem[1.5] include the addition of new team-based @@ -736,7 +745,7 @@ \section{Version 1.5} interface, and the removal of the entire \Fortran \ac{API}. The following list describes the specific changes in \openshmem[1.5]: -\begin{itemize} +\begin{enumerate} % \item Removed \FUNC{SHMEM\_CACHE}. \ChangelogRef{dep:shmem_cache}% @@ -887,7 +896,7 @@ \section{Version 1.5} \item Clarified the atomicity guarantees of the \openshmem memory model. \ChangelogRef{subsec:amo_guarantees}% % -\end{itemize} +\end{enumerate} \section{Version 1.4} Major changes in \openshmem[1.4] include @@ -902,7 +911,7 @@ \section{Version 1.4} and \Cstd[11] type-generic interfaces for point-to-point synchronization. The following list describes the specific changes in \openshmem[1.4]: -\begin{itemize} +\begin{enumerate} % \item New communication management \ac{API}, including \FUNC{shmem\_ctx\_create}; \FUNC{shmem\_ctx\_destroy}; and additional \ac{RMA}, \ac{AMO}, and memory ordering @@ -1022,7 +1031,7 @@ \section{Version 1.4} \item Clarified that complex-typed reductions in C are optionally supported. \ChangelogRef{subsec:shmem_reductions}% % -\end{itemize} +\end{enumerate} @@ -1035,7 +1044,7 @@ \section{Version 1.3} and \Cstd[11] type-generic interfaces for \ac{RMA} and \ac{AMO} operations. The following list describes the specific changes in \openshmem[1.3]: -\begin{itemize} +\begin{enumerate} % \item Clarified implementation of \acp{PE} as threads. % @@ -1076,7 +1085,7 @@ \section{Version 1.3} \item Deprecation of \FUNC{SHMEM\_CACHE}. \ChangelogRef{dep:shmem_cache}% % -\end{itemize} +\end{enumerate} @@ -1091,7 +1100,7 @@ \section{Version 1.2} and clarifications to several \ac{API} descriptions. The following list describes the specific changes in \openshmem[1.2]: -\begin{itemize} +\begin{enumerate} % \item Added specification of \VAR{pSync} initialization for all routines that use it. % @@ -1147,7 +1156,7 @@ \section{Version 1.2} support across versions of the \openshmem Specification. \ChangelogRef{sec:dep}% % -\end{itemize} +\end{enumerate} @@ -1161,7 +1170,7 @@ \section{Version 1.1} and general readabilty and usability improvements to the document structure. The following list describes the specific changes in \openshmem[1.1]: -\begin{itemize} +\begin{enumerate} % \item Clarifications of the completion semantics of memory synchronization interfaces. @@ -1270,7 +1279,7 @@ \section{Version 1.1} \item Name changes for UV and ICE for \ac{SGI} systems. \ChangelogRef{sec:openshmem_history}% % -\end{itemize} +\end{enumerate} \chapter{Errata}\label{sec:errata} @@ -1292,24 +1301,28 @@ \chapter{Errata}\label{sec:errata} \section{Version 1.5} -\begin{itemize} +\begin{enumerate} \item Clarified that \FUNC{shmem\_test\_all} and \FUNC{shmem\_test\_all\_vector} routines return 1 when the test set is empty - (\href{https://github.com/openshmem-org/specification/pull/466}{\#466}). + (\ref{changelog:v1.6}.\ref{changelog:test_all}). \item Clarified that \FUNC{shmem\_team\_split\_strided} and \FUNC{shmem\_team\_split\_2d} return nonzero when the parent team is \LibConstRef{SHMEM\_TEAM\_INVALID} - (\href{https://github.com/openshmem-org/specification/pull/461}{\#461}). + (\ref{changelog:v1.6}.\ref{changelog:split_strided_2d}). \item Corrected the \VAR{level} argument's recommended value in API notes for \FUNC{shmem\_pcontrol} to indicate that the value should be greater than 2 to enable profiling with profile library defined effects and additional arguments - (\href{https://github.com/openshmem-org/specification/pull/480}{\#480}). - \item Clarified that fence operations only guarantee ordering for operations + (\ref{changelog:v1.6}.\ref{changelog:pcontrol}). + \item Clarified that \OPR{Fence} operations only guarantee ordering for operations that are performed on the same context - (\href{https://github.com/openshmem-org/specification/pull/496}{\#496}). + (\ref{changelog:v1.6}.\ref{changelog:fence_ctx}). \item Clarified that \VAR{source} and \VAR{dest} arrays must be the same across \acp{PE} in \openshmem reductions - (\href{https://github.com/openshmem-org/specification/pull/490}{\#490}). -\end{itemize} + (\ref{changelog:v1.6}.\ref{changelog:reduction_args}). + \item Removed \openshmem[1.5] Table 9, which was an incomplete duplicate of + \openshmem[1.5] Table 10, and clarified the types, names, and supporting + operations for team-based reductions + (\ref{changelog:v1.6}.\ref{changelog:reduction_table}). +\end{enumerate} %end of setlength command that was started in frontmatter.tex From 07da7b367c130abc4f55a56647344ced75ea596b Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 12:13:55 -0400 Subject: [PATCH 24/72] backmatter: code review errata section corrections --- content/backmatter.tex | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 78ad1186..b0672b41 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -1283,21 +1283,14 @@ \section{Version 1.1} \chapter{Errata}\label{sec:errata} -The \openshmem specification may occasionally include errata that are -discovered after the release of new versions. -These errata can range from typographical mistakes to more significant -technical inaccuracies that may affect the implementation or understanding of -the \openshmem API semantics. -This Errata section reports these issues with the goal to maintain the -integrity and utility of the OpenSHMEM specification. - Errors or ambiguities in the \openshmem specification may be discovered after -publication. Errata, or corrections, are included in the -the sections below indicating the version of the OpenSHMEM specification -that required the correction or clarification. These corrections have been applied -to all subsequent versions of the specification and this section -serves as a historical record of the changes made to assist users and -implementers with applying the necessary corrections. +publication. +Errata, or corrections, are included in the the sections below indicating the +version of the OpenSHMEM specification that required the correction or +clarification. +These corrections have been applied to all subsequent versions of the +specification and this section serves as a historical record of the changes +made to assist users and implementers with applying the necessary corrections. \section{Version 1.5} From a0bc82ec34487bcf90c59a1fec2d2aab062e5b21 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 12:29:12 -0400 Subject: [PATCH 25/72] back: Add Errata changelog, reorder for clarity --- content/backmatter.tex | 74 ++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index b0672b41..256c6ae5 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -673,24 +673,10 @@ \section{Version 1.6} update a remote flag without associated data transfer of a put-with-signal operation. \ChangelogRef{subsec:shmem_signal_add, subsec:shmem_signal_set}% % -\item Clarified that \OPR{Fence} operations only guarantee ordering for - operations that are performed on the same context. \label{changelog:fence_ctx} -\ChangelogRef{subsec:shmem_fence}% -% \item Added a team-based pointer query routine: \FUNC{shmem\_team\_ptr}. \ChangelogRef{subsec:shmem_team_ptr}% % -\item Clarified that \FUNC{shmem\_team\_split\_strided} and - \FUNC{shmem\_team\_split\_strided} return a nonzero value when the parent - team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. \label{changelog:split_strided_2d} -\ChangelogRef{subsec:shmem_team_split_strided, subsec:shmem_team_split_2d}% -% -\item Removed \openshmem[1.5] Table 9, which was an incomplete duplicate of - \openshmem[1.5] Table 10, and clarified the types, names, and supporting - operations for team-based reductions. \label{changelog:reduction_table} -\ChangelogRef{teamreducetypes}% -% \item Added the session routines, \FUNC{shmem\_ctx\_session\_start} and \FUNC{shmem\_ctx\_session\_stop}, which allow users to pass hints to the \openshmem library to apply runtime optimizations. @@ -708,11 +694,6 @@ \section{Version 1.6} the world team. \ChangelogRef{subsec:shmem_malloc, subsec:shmem_free, subsec:shmem_realloc, subsec:shmem_align, subsec:shmmallochint, subsec:shmem_calloc}% -\item Corrected the level argument's recommended value in API notes for - \FUNC{shmem\_pcontrol} to indicate that the value should be greater than - 2 to enable profiling with profile library defined effects and - additional arguments. \label{changelog:pcontrol} -\ChangelogRef{subsec:shmem_pcontrol} % \item Clarified that \FUNC{shmem\_team\_get\_config} returns the current configuration values, which may differ from the values assigned at the @@ -727,14 +708,39 @@ \section{Version 1.6} stride argument is 0 or negative. \ChangelogRef{subsec:shmem_team_split_strided} % -\item Clarified that \FUNC{shmem\_test\_all} and \FUNC{shmem\_test\_all\_vector} - routines return 1 when the test set is empty. \label{changelog:test_all} -\ChangelogRef{subsec:shmem_test_all,subsec:shmem_test_all_vector}% +\item Added a new Errata Section~\ref{sec:errata} that indicates errors or ambiguities in the + \openshmem specification and the version that required correction or clarification. +\ChangelogRef{sec:errata} +% +\item Removed \openshmem[1.5] Table 9, which was an incomplete duplicate of + \openshmem[1.5] Table 10, and clarified the types, names, and supporting + operations for team-based reductions. \label{changelog:reduction_table} +\ChangelogRef{teamreducetypes}% % \item Clarified that \VAR{source} and \VAR{dest} arrays must be the same across \acp{PE} in \openshmem reductions \label{changelog:reduction_args} \ChangelogRef{subsec:shmem_reductions} % +\item Clarified that \OPR{Fence} operations only guarantee ordering for + operations that are performed on the same context. \label{changelog:fence_ctx} +\ChangelogRef{subsec:shmem_fence}% +% +\item Clarified that \FUNC{shmem\_test\_all} and \FUNC{shmem\_test\_all\_vector} + routines return 1 when the test set is empty. \label{changelog:test_all} +\ChangelogRef{subsec:shmem_test_all,subsec:shmem_test_all_vector}% +% +\item Clarified that \FUNC{shmem\_team\_split\_strided} and + \FUNC{shmem\_team\_split\_strided} return a nonzero value when the parent + team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. \label{changelog:split_strided_2d} +\ChangelogRef{subsec:shmem_team_split_strided, subsec:shmem_team_split_2d}% +% +\item Corrected the level argument's recommended value in API notes for + \FUNC{shmem\_pcontrol} to indicate that the value should be greater than + 2 to enable profiling with profile library defined effects and + additional arguments. \label{changelog:pcontrol} +\ChangelogRef{subsec:shmem_pcontrol} +% + \end{enumerate} \section{Version 1.5} @@ -1291,10 +1297,24 @@ \chapter{Errata}\label{sec:errata} These corrections have been applied to all subsequent versions of the specification and this section serves as a historical record of the changes made to assist users and implementers with applying the necessary corrections. +Errata that result in a change to the specifciation are also included in +Annex~\ref{sec:changelog}. +For an implementation to comply with a particular version of \openshmem, it +must account for all errata associated with that version as indicated below. \section{Version 1.5} \begin{enumerate} + \item Removed \openshmem[1.5] Table 9, which was an incomplete duplicate of + \openshmem[1.5] Table 10, and clarified the types, names, and supporting + operations for team-based reductions + (\ref{changelog:v1.6}.\ref{changelog:reduction_table}). + \item Clarified that \VAR{source} and \VAR{dest} arrays must be the same + across \acp{PE} in \openshmem reductions + (\ref{changelog:v1.6}.\ref{changelog:reduction_args}). + \item Clarified that \OPR{Fence} operations only guarantee ordering for operations + that are performed on the same context + (\ref{changelog:v1.6}.\ref{changelog:fence_ctx}). \item Clarified that \FUNC{shmem\_test\_all} and \FUNC{shmem\_test\_all\_vector} routines return 1 when the test set is empty (\ref{changelog:v1.6}.\ref{changelog:test_all}). @@ -1306,16 +1326,6 @@ \section{Version 1.5} \FUNC{shmem\_pcontrol} to indicate that the value should be greater than 2 to enable profiling with profile library defined effects and additional arguments (\ref{changelog:v1.6}.\ref{changelog:pcontrol}). - \item Clarified that \OPR{Fence} operations only guarantee ordering for operations - that are performed on the same context - (\ref{changelog:v1.6}.\ref{changelog:fence_ctx}). - \item Clarified that \VAR{source} and \VAR{dest} arrays must be the same - across \acp{PE} in \openshmem reductions - (\ref{changelog:v1.6}.\ref{changelog:reduction_args}). - \item Removed \openshmem[1.5] Table 9, which was an incomplete duplicate of - \openshmem[1.5] Table 10, and clarified the types, names, and supporting - operations for team-based reductions - (\ref{changelog:v1.6}.\ref{changelog:reduction_table}). \end{enumerate} %end of setlength command that was started in frontmatter.tex From 25bb62d5b613d46a4169adb7ed0f4535011435ff Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Thu, 29 Aug 2024 11:48:17 -0500 Subject: [PATCH 26/72] Clarification of query_initialized --- content/shmem_init.tex | 7 ++++--- content/shmem_query_initialized.tex | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/content/shmem_init.tex b/content/shmem_init.tex index f2d5e4c2..95cc3049 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -16,9 +16,10 @@ \apidescription{ \FUNC{shmem\_init} allocates and initializes resources used by the \openshmem library. It is a collective operation that all \acp{PE} must call before any - other \openshmem routine may be called. At the end of the \openshmem program - which it initialized, the call to \FUNC{shmem\_init} must be matched with a - call to \FUNC{shmem\_finalize}. + other \openshmem routine may be called, except \FUNC{shmem\_query\_initialized} + which checks the current initialized state of the library. At the end of the + \openshmem program which it initialized, the call to \FUNC{shmem\_init} must + be matched with a call to \FUNC{shmem\_finalize}. The \FUNC{shmem\_init} and \FUNC{shmem\_init\_thread} initialization routines may be called multiple times within an \openshmem program. A diff --git a/content/shmem_query_initialized.tex b/content/shmem_query_initialized.tex index b3729b7c..b93d9917 100644 --- a/content/shmem_query_initialized.tex +++ b/content/shmem_query_initialized.tex @@ -20,7 +20,7 @@ zero. This function may be called at any time, regardless of the thread safety - level of the \openshmem library. + level or the current initialized state of the \openshmem library. } \apireturnvalues{ From 5596f4f5a0d047a4eae5aabbc655cd6c69ea22c4 Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Thu, 29 Aug 2024 12:06:56 -0500 Subject: [PATCH 27/72] Brian suggestion on init finalize --- content/shmem_init.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_init.tex b/content/shmem_init.tex index 95cc3049..4929ecaf 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -17,9 +17,9 @@ \FUNC{shmem\_init} allocates and initializes resources used by the \openshmem library. It is a collective operation that all \acp{PE} must call before any other \openshmem routine may be called, except \FUNC{shmem\_query\_initialized} - which checks the current initialized state of the library. At the end of the - \openshmem program which it initialized, the call to \FUNC{shmem\_init} must - be matched with a call to \FUNC{shmem\_finalize}. + which checks the current initialized state of the library. In the + \openshmem program which it initialized, each call to \FUNC{shmem\_init} must + be matched with a corresponding call to \FUNC{shmem\_finalize}. The \FUNC{shmem\_init} and \FUNC{shmem\_init\_thread} initialization routines may be called multiple times within an \openshmem program. A From 3b1cbb37054421babe9cc1589753b4ac680bf290 Mon Sep 17 00:00:00 2001 From: maawad Date: Thu, 29 Aug 2024 12:02:29 -0700 Subject: [PATCH 28/72] Fix `shmem_team_split_strided`'s `start` argument wording for non-positive value. --- content/shmem_team_split_strided.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 08969792..9232962e 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -13,8 +13,7 @@ \begin{apiarguments} \apiargument{IN}{parent\_team}{An \openshmem team.} -\apiargument{IN}{start}{The lowest \ac{PE} number of the subset of \acp{PE} from -the parent team that will form the new team.} +\apiargument{IN}{start}{The first \acs{PE} number of the subset of \acp{PE} from the parent team that will form the new team. If the stride is less than zero, the first \acs{PE} number is the highest; if greater than zero, it is the lowest; if the stride is zero, it is the starting \acs{PE}.} \apiargument{IN}{stride}{The stride between team \ac{PE} numbers in the parent team that comprise the subset of \acp{PE} that will form From 1aa5958d1ff6127224a5ebffeb531ac0463b2434 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 15:19:15 -0400 Subject: [PATCH 29/72] teams: split does not permit parent PE wrap-around --- content/shmem_team_split_strided.tex | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 08969792..18671d56 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -13,7 +13,7 @@ \begin{apiarguments} \apiargument{IN}{parent\_team}{An \openshmem team.} -\apiargument{IN}{start}{The lowest \ac{PE} number of the subset of \acp{PE} from +\apiargument{IN}{start}{The first \ac{PE} number of the subset of \acp{PE} from the parent team that will form the new team.} \apiargument{IN}{stride}{The stride between team \ac{PE} @@ -59,6 +59,17 @@ relative order with respect to the parent team. If a $stride$ value equal to 0 is passed to \FUNC{shmem\_team\_split\_strided}, then the $size$ argument passed must be 1, or the behavior is undefined. +A newly created team must only include \acp{PE} whose subsequent parent \ac{PE} +values are either all increasing (for positive $stride$) or all decreasing +(for negative $stride$). +That is, \textit{wrap-around} with respect to the parent team's \ac{PE} values +is not permitted. +For example, the list of \acp{PE} in the parent team should not start at a high +number and then continue to include \acp{PE} in the lower end of the parent +team's \ac{PE} range. +If the triplet provided to \FUNC{shmem\_team\_split\_strided} implies such a +wrap-around sequence, the input is considered invalid and the behavior is +undefined. This routine must be called by all \acp{PE} in the parent team. All \acp{PE} must provide the same values for the \ac{PE} triplet. From a0911b0167e9b32dc2ad9fb53f4d9c309adc37da Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 15:19:15 -0400 Subject: [PATCH 30/72] teams: split does not permit parent PE wrap-around --- content/shmem_team_split_strided.tex | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 08969792..c147c447 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -59,6 +59,17 @@ relative order with respect to the parent team. If a $stride$ value equal to 0 is passed to \FUNC{shmem\_team\_split\_strided}, then the $size$ argument passed must be 1, or the behavior is undefined. +A newly created team must only include \acp{PE} whose subsequent parent \ac{PE} +values are either all increasing (for positive $stride$) or all decreasing +(for negative $stride$). +That is, \textit{wrap-around} with respect to the parent team's \ac{PE} values +is not permitted. +For example, the list of \acp{PE} in the parent team should not start at a high +number and then continue to include \acp{PE} in the lower end of the parent +team's \ac{PE} range. +If the triplet provided to \FUNC{shmem\_team\_split\_strided} implies such a +wrap-around sequence, the input is considered invalid and the behavior is +undefined. This routine must be called by all \acp{PE} in the parent team. All \acp{PE} must provide the same values for the \ac{PE} triplet. From a3b5d824d03e3f4acd99ccf592d742767922abcc Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Thu, 29 Aug 2024 14:51:30 -0500 Subject: [PATCH 31/72] Typo fix --- content/atomics_intro.tex | 2 +- content/rma_intro.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/atomics_intro.tex b/content/atomics_intro.tex index d803887d..3a9ad5c6 100644 --- a/content/atomics_intro.tex +++ b/content/atomics_intro.tex @@ -56,7 +56,7 @@ integer types defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.1.1 and \Cstd[11]~\S7.20.1.1. When the \Cstd translation environment does not provide exact-width integer types with \HEADER{stdint.h}, an -\openshmem implemementation is not required to provide support for these types. +\openshmem implementation is not required to provide support for these types. \begin{table}[h] \begin{center} diff --git a/content/rma_intro.tex b/content/rma_intro.tex index f986d6c8..e45078a8 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -44,7 +44,7 @@ \footnote{Formally, the \Cstd[99] specification is ISO/IEC~9899:1999(E).}% ~\S7.18.1.1 and \Cstd[11]~\S7.20.1.1. When the \Cstd translation environment does not provide exact-width integer types with \HEADER{stdint.h}, an -\openshmem implemementation is not required to provide support for these types. +\openshmem implementation is not required to provide support for these types. \begin{table}[h] \begin{center} From 5ba6559e10bd160c3648e0ec501b7a4ed59d9bba Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 16:48:45 -0400 Subject: [PATCH 32/72] Update content/shmem_team_split_strided.tex Co-authored-by: Muhammad Awad --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index c147c447..e475290c 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -59,7 +59,7 @@ relative order with respect to the parent team. If a $stride$ value equal to 0 is passed to \FUNC{shmem\_team\_split\_strided}, then the $size$ argument passed must be 1, or the behavior is undefined. -A newly created team must only include \acp{PE} whose subsequent parent \ac{PE} +When $stride$ is nonzero, a newly created team must only include \acp{PE} whose subsequent parent \ac{PE} values are either all increasing (for positive $stride$) or all decreasing (for negative $stride$). That is, \textit{wrap-around} with respect to the parent team's \ac{PE} values From 46fbaa96cffdb6c272c698a7b52a583c9d481153 Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Thu, 29 Aug 2024 16:29:31 -0500 Subject: [PATCH 33/72] Clarified PE being relative to the team for APIs with context --- content/rma_intro.tex | 2 +- content/shmem_atomic_add.tex | 4 ++-- content/shmem_atomic_and.tex | 5 ++--- content/shmem_atomic_compare_swap.tex | 4 ++-- content/shmem_atomic_compare_swap_nbi.tex | 4 ++-- content/shmem_atomic_fetch.tex | 6 +++--- content/shmem_atomic_fetch_add.tex | 5 ++--- content/shmem_atomic_fetch_add_nbi.tex | 5 ++--- content/shmem_atomic_fetch_and.tex | 5 ++--- content/shmem_atomic_fetch_and_nbi.tex | 5 ++--- content/shmem_atomic_fetch_inc.tex | 5 ++--- content/shmem_atomic_fetch_inc_nbi.tex | 5 ++--- content/shmem_atomic_fetch_nbi.tex | 5 ++--- content/shmem_atomic_fetch_or.tex | 5 ++--- content/shmem_atomic_fetch_or_nbi.tex | 5 ++--- content/shmem_atomic_fetch_xor.tex | 5 ++--- content/shmem_atomic_fetch_xor_nbi.tex | 5 ++--- content/shmem_atomic_inc.tex | 5 ++--- content/shmem_atomic_or.tex | 5 ++--- content/shmem_atomic_set.tex | 5 ++--- content/shmem_atomic_swap.tex | 4 ++-- content/shmem_atomic_swap_nbi.tex | 4 ++-- content/shmem_atomic_xor.tex | 5 ++--- content/shmem_g.tex | 3 +++ content/shmem_get.tex | 3 ++- content/shmem_get_nbi.tex | 3 ++- content/shmem_ibget.tex | 3 ++- content/shmem_ibput.tex | 3 ++- content/shmem_iget.tex | 3 ++- content/shmem_iput.tex | 3 ++- content/shmem_p.tex | 3 ++- content/shmem_put.tex | 3 ++- content/shmem_put_nbi.tex | 3 ++- content/shmem_put_signal.tex | 3 ++- content/shmem_put_signal_nbi.tex | 3 ++- content/shmem_signal_add.tex | 5 ++--- content/shmem_signal_set.tex | 5 ++--- 37 files changed, 75 insertions(+), 79 deletions(-) diff --git a/content/rma_intro.tex b/content/rma_intro.tex index e45078a8..200966bd 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -18,7 +18,7 @@ The destination \ac{PE} is specified as an integer representing the \ac{PE} number. This \ac{PE} number is relative to the team associated with the -communication context being using for the operation. If no context argument is passed to the routine, +communication context being used for the operation. If no context argument is passed to the routine, then the routine operates on the default context, which implies that the \ac{PE} number is relative to the world team. If the \ac{PE} number passed to the routine is invalid, being negative diff --git a/content/shmem_atomic_add.tex b/content/shmem_atomic_add.tex index 12737496..8a3b4f3d 100644 --- a/content/shmem_atomic_add.tex +++ b/content/shmem_atomic_add.tex @@ -39,8 +39,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the atomic add operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \dest{} is to be updated.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_and.tex b/content/shmem_atomic_and.tex index f82ee4bf..5803812b 100644 --- a/content/shmem_atomic_and.tex +++ b/content/shmem_atomic_and.tex @@ -28,9 +28,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise AND operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_compare_swap.tex b/content/shmem_atomic_compare_swap.tex index 85b371fa..3763909d 100644 --- a/content/shmem_atomic_compare_swap.tex +++ b/content/shmem_atomic_compare_swap.tex @@ -45,8 +45,8 @@ type as \VAR{dest}.} \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \VAR{dest} is to be updated.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index 84a90a12..a3dabaa5 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -35,8 +35,8 @@ \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \VAR{dest} is to be updated.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch.tex b/content/shmem_atomic_fetch.tex index 3c11f2a1..3e7fab8a 100644 --- a/content/shmem_atomic_fetch.tex +++ b/content/shmem_atomic_fetch.tex @@ -40,9 +40,9 @@ the default context.} \apiargument{IN}{source}{Symmetric address of the source data object. The type of \source{} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number from which - \VAR{source} is to be fetched.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} on which \VAR{source} resides + relative to the team associated with the given \VAR{ctx} when provided, or the + default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_add.tex b/content/shmem_atomic_fetch_add.tex index a07977ec..a0e73984 100644 --- a/content/shmem_atomic_fetch_add.tex +++ b/content/shmem_atomic_fetch_add.tex @@ -41,9 +41,8 @@ SYNOPSIS section.} \apiargument{IN}{value}{The operand to the atomic fetch-and-add operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated.} - +\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 3b9e4021..7f1007a2 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -30,9 +30,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the atomic fetch-and-add operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_and.tex b/content/shmem_atomic_fetch_and.tex index 0a4d7843..675449f2 100644 --- a/content/shmem_atomic_fetch_and.tex +++ b/content/shmem_atomic_fetch_and.tex @@ -27,9 +27,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise AND operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index 9959a1cc..97b8b0c5 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -30,9 +30,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise AND operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_inc.tex b/content/shmem_atomic_fetch_inc.tex index 44710246..96437ac7 100644 --- a/content/shmem_atomic_fetch_inc.tex +++ b/content/shmem_atomic_fetch_inc.tex @@ -38,9 +38,8 @@ the default context.} \apiargument{OUT}{dest}{Symmetric address of the destination data object. The type of \dest{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated.} - +\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 6cfbfb3a..a7c17c3b 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -28,9 +28,8 @@ The type of \VAR{fetch} should match that implied in the SYNOPSIS section.} \apiargument{OUT}{dest}{Symmetric address of the destination data object. The type of \dest{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated.} - +\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index 4891dc97..43fdd53b 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -28,9 +28,8 @@ The type of \VAR{fetch} should match that implied in the SYNOPSIS section.} \apiargument{OUT}{source}{Symmetric address of the source data object. The type of \source{} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number from which - \VAR{source} is to be fetched.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_or.tex b/content/shmem_atomic_fetch_or.tex index 81046d8a..0eb922bd 100644 --- a/content/shmem_atomic_fetch_or.tex +++ b/content/shmem_atomic_fetch_or.tex @@ -27,9 +27,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise OR operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index 7a6c8668..d62fcd3a 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -30,9 +30,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise OR operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_xor.tex b/content/shmem_atomic_fetch_xor.tex index a390500e..fd563cb1 100644 --- a/content/shmem_atomic_fetch_xor.tex +++ b/content/shmem_atomic_fetch_xor.tex @@ -28,9 +28,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise XOR operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index 2b2cd085..f69739c3 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -30,9 +30,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise XOR operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_inc.tex b/content/shmem_atomic_inc.tex index fde7e9dc..c7cc359f 100644 --- a/content/shmem_atomic_inc.tex +++ b/content/shmem_atomic_inc.tex @@ -38,9 +38,8 @@ the default context.} \apiargument{OUT}{dest}{Symmetric address of the destination data object. The type of \dest{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated.} - +\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_or.tex b/content/shmem_atomic_or.tex index 033de757..5f00c530 100644 --- a/content/shmem_atomic_or.tex +++ b/content/shmem_atomic_or.tex @@ -28,9 +28,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise OR operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_set.tex b/content/shmem_atomic_set.tex index fcea7dbc..070e760f 100644 --- a/content/shmem_atomic_set.tex +++ b/content/shmem_atomic_set.tex @@ -42,9 +42,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the atomic set operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated.} - +\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_swap.tex b/content/shmem_atomic_swap.tex index adab7bdb..3a0a6577 100644 --- a/content/shmem_atomic_swap.tex +++ b/content/shmem_atomic_swap.tex @@ -39,8 +39,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which - \dest{} is to be updated.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index bfac883c..fb619f20 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -30,8 +30,8 @@ \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_atomic_xor.tex b/content/shmem_atomic_xor.tex index d4f863ad..65a7b6dc 100644 --- a/content/shmem_atomic_xor.tex +++ b/content/shmem_atomic_xor.tex @@ -28,9 +28,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The operand to the bitwise XOR operation. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} - is to be updated.} - + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_g.tex b/content/shmem_g.tex index 1fa6a2c9..e9412c81 100644 --- a/content/shmem_g.tex +++ b/content/shmem_g.tex @@ -23,6 +23,9 @@ \apiargument{IN}{source}{Symmetric address of the source data object. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{pe}{The number of the remote \ac{PE} on which \VAR{source} resides.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} on which \VAR{source} resides + relative to the team associated with the given \VAR{ctx} when provided, or the + default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_get.tex b/content/shmem_get.tex index 890f83f2..2536421b 100644 --- a/content/shmem_get.tex +++ b/content/shmem_get.tex @@ -37,7 +37,8 @@ The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. For \FUNC{shmem\_getmem} and \FUNC{shmem\_ctx\_getmem}, elements are bytes.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_get_nbi.tex b/content/shmem_get_nbi.tex index 57adb768..61e8261f 100644 --- a/content/shmem_get_nbi.tex +++ b/content/shmem_get_nbi.tex @@ -39,7 +39,8 @@ \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. For \FUNC{shmem\_getmem\_nbi} and \FUNC{shmem\_ctx\_getmem\_nbi}, elements are bytes.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_ibget.tex b/content/shmem_ibget.tex index 4deacf0f..811f1e2d 100644 --- a/content/shmem_ibget.tex +++ b/content/shmem_ibget.tex @@ -42,7 +42,8 @@ arrays.} \apiargument{IN}{nblocks}{Number of blocks to be copied from the \source{} array to the \dest{} array.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_ibput.tex b/content/shmem_ibput.tex index 2c949854..695c05d9 100644 --- a/content/shmem_ibput.tex +++ b/content/shmem_ibput.tex @@ -42,7 +42,8 @@ arrays.} \apiargument{IN}{nblocks}{Number of blocks to be copied from the \source{} array to the \dest{} array.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} diff --git a/content/shmem_iget.tex b/content/shmem_iget.tex index 85871571..369d057a 100644 --- a/content/shmem_iget.tex +++ b/content/shmem_iget.tex @@ -40,7 +40,8 @@ indicates contiguous data.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_iput.tex b/content/shmem_iput.tex index 16006b00..a937d54e 100644 --- a/content/shmem_iput.tex +++ b/content/shmem_iput.tex @@ -39,7 +39,8 @@ scaled by the element size of the \source{} array. A value of \CONST{1} indicates contiguous data.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} diff --git a/content/shmem_p.tex b/content/shmem_p.tex index 71e5594b..9f94208a 100644 --- a/content/shmem_p.tex +++ b/content/shmem_p.tex @@ -24,7 +24,8 @@ The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The value to be transferred to \VAR{dest}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{The number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_put.tex b/content/shmem_put.tex index f71355a8..ce5c3c22 100644 --- a/content/shmem_put.tex +++ b/content/shmem_put.tex @@ -38,7 +38,8 @@ The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} arrays. For \FUNC{shmem\_putmem} and \FUNC{shmem\_ctx\_putmem}, elements are bytes.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_put_nbi.tex b/content/shmem_put_nbi.tex index 0c42d7c8..0f7c385d 100644 --- a/content/shmem_put_nbi.tex +++ b/content/shmem_put_nbi.tex @@ -39,7 +39,8 @@ \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} arrays. For \FUNC{shmem\_putmem\_nbi} and \FUNC{shmem\_ctx\_putmem\_nbi}, elements are bytes.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 7fd33a8e..79912934 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -48,7 +48,8 @@ remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update to be performed on the remote \VAR{sig\_addr} signal data object.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 3cbf3625..611d301e 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -48,7 +48,8 @@ remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update to be performed on the remote \VAR{sig\_addr} signal data object.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_signal_add.tex b/content/shmem_signal_add.tex index 362fbeef..a7bb2aa3 100644 --- a/content/shmem_signal_add.tex +++ b/content/shmem_signal_add.tex @@ -27,9 +27,8 @@ Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object. } - \apiargument{IN}{pe}{ - \ac{PE} number of the remote \ac{PE}. - } + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ diff --git a/content/shmem_signal_set.tex b/content/shmem_signal_set.tex index d0eefbfd..b463917a 100644 --- a/content/shmem_signal_set.tex +++ b/content/shmem_signal_set.tex @@ -27,9 +27,8 @@ Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object. } - \apiargument{IN}{pe}{ - \ac{PE} number of the remote \ac{PE}. - } + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} relative to the team associated + with the given \VAR{ctx} when provided, or the default context otherwise.} \end{apiarguments} \apidescription{ From fe631aea72384e98166033372ca2f2f2b501e57f Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Fri, 30 Aug 2024 07:34:53 -0400 Subject: [PATCH 34/72] different -> remote PE --- content/shmem_get.tex | 2 +- content/shmem_get_nbi.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_get.tex b/content/shmem_get.tex index 2536421b..b34f84cf 100644 --- a/content/shmem_get.tex +++ b/content/shmem_get.tex @@ -43,7 +43,7 @@ \apidescription{ The get routines provide a method for copying a contiguous symmetric data - object from a different \ac{PE} to a contiguous data object on the local + object from a remote \ac{PE} to a contiguous data object on the local \ac{PE}. The routines return after the data has been delivered to the \dest{} array on the local \ac{PE}. } diff --git a/content/shmem_get_nbi.tex b/content/shmem_get_nbi.tex index 61e8261f..61bbb698 100644 --- a/content/shmem_get_nbi.tex +++ b/content/shmem_get_nbi.tex @@ -45,7 +45,7 @@ \apidescription{ The get routines provide a method for copying a contiguous symmetric data - object from a different \ac{PE} to a contiguous data object on the local + object from a remote \ac{PE} to a contiguous data object on the local \ac{PE}. The routines return after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the From 787391c8aefb9bc434df8d47d7955e16428a9680 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Fri, 30 Aug 2024 07:35:51 -0400 Subject: [PATCH 35/72] Added 'with 'blocks of size bsize' to text --- content/shmem_ibput.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_ibput.tex b/content/shmem_ibput.tex index 695c05d9..55b2987d 100644 --- a/content/shmem_ibput.tex +++ b/content/shmem_ibput.tex @@ -49,7 +49,7 @@ \apidescription{ The \FUNC{shmem\_ibput} routines provide a method for copying strided data - blocks (specified by \VAR{sst}) of an array from a \source{} array on the + blocks (of size \VAR{bsize}) with stride (specified by \VAR{sst}) of an array from a \source{} array on the local \ac{PE} to locations specified by stride \VAR{dst} on a \dest{} array on specified remote \ac{PE}. The routines return when the data has been copied out of the \VAR{source} array on the local \ac{PE} but not From cd6b99e2f1fb01a5ae6f1176596c71d3c1fe9cc1 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Fri, 30 Aug 2024 07:36:28 -0400 Subject: [PATCH 36/72] Explain all the deprecated blocks since we added _atomics_ to AMO operations --- content/atomics_intro.tex | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/content/atomics_intro.tex b/content/atomics_intro.tex index 3a9ad5c6..ed78ef51 100644 --- a/content/atomics_intro.tex +++ b/content/atomics_intro.tex @@ -123,3 +123,11 @@ \label{bitamotypes} \end{center} \end{table} + +Starting in \openshmem[1.4], all \ac{AMO} functions added "\_atomic\_" to the function name. There were no semantic of argument changes to the functions, simply a name change. + + + + + + From 7e0191cb8f8d7684810d4027709d5508f4e479c7 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 12:49:31 -0400 Subject: [PATCH 37/72] teams: improve the explanation of no wrap-arounds --- content/shmem_team_split_strided.tex | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index c2f3523f..d282b955 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -59,17 +59,18 @@ relative order with respect to the parent team. If a $stride$ value equal to 0 is passed to \FUNC{shmem\_team\_split\_strided}, then the $size$ argument passed must be 1, or the behavior is undefined. -When $stride$ is nonzero, a newly created team must only include \acp{PE} whose subsequent parent \ac{PE} -values are either all increasing (for positive $stride$) or all decreasing -(for negative $stride$). +If the triplet provided to \FUNC{shmem\_team\_split\_strided} implies a +wrap-around sequence, the input is considered invalid and the behavior is +undefined. +In other words, when $stride$ is nonzero, a newly created team must only +include \acp{PE} whose subsequent parent \ac{PE} values are either all +increasing (for positive $stride$) or all decreasing (for negative +$stride$). That is, \textit{wrap-around} with respect to the parent team's \ac{PE} values is not permitted. For example, the list of \acp{PE} in the parent team should not start at a high number and then continue to include \acp{PE} in the lower end of the parent team's \ac{PE} range. -If the triplet provided to \FUNC{shmem\_team\_split\_strided} implies such a -wrap-around sequence, the input is considered invalid and the behavior is -undefined. This routine must be called by all \acp{PE} in the parent team. All \acp{PE} must provide the same values for the \ac{PE} triplet. From 266a8dae29751c117a6db4a849c481065d5f273b Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 12:56:10 -0400 Subject: [PATCH 38/72] teams: revert change to "start" arg description The fix to "start" is covered in a different PR: https://github.com/wokuno/specification/pull/1 --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index d282b955..0f247a75 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -13,7 +13,7 @@ \begin{apiarguments} \apiargument{IN}{parent\_team}{An \openshmem team.} -\apiargument{IN}{start}{The first \ac{PE} number of the subset of \acp{PE} from +\apiargument{IN}{start}{The lowest \ac{PE} number of the subset of \acp{PE} from the parent team that will form the new team.} \apiargument{IN}{stride}{The stride between team \ac{PE} From 03dfb155c06e2ac969267901f5172a9f7cd3352b Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Fri, 30 Aug 2024 13:14:49 -0400 Subject: [PATCH 39/72] Tweaked deprecated atomics text per conversation with Jim. Now it's in a deprecated block for easy future removal --- content/atomics_intro.tex | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/content/atomics_intro.tex b/content/atomics_intro.tex index ed78ef51..71f2ec5e 100644 --- a/content/atomics_intro.tex +++ b/content/atomics_intro.tex @@ -29,6 +29,13 @@ The non-fetching routines include: \FUNC{shmem\_atomic\_\{set, inc, add, and, or, xor\}[\_nbi]}. +\begin{DeprecateBlock} + +Starting in \openshmem[1.4], all \ac{AMO} functions added "\_atomic\_" to the function +name and deprecated the equivalent functions without "\_atomic\_" in the name. + +\end{DeprecateBlock} + \end{itemize} \openshmem \ac{AMO} routines specified in this section have two variants. In @@ -123,8 +130,7 @@ \label{bitamotypes} \end{center} \end{table} - -Starting in \openshmem[1.4], all \ac{AMO} functions added "\_atomic\_" to the function name. There were no semantic of argument changes to the functions, simply a name change. +] From 45a64caa7fcd9c146e6b9b0a18cd579d3cf51af9 Mon Sep 17 00:00:00 2001 From: Bryan Morgan <108841484+bcmIntc@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:27:12 -0400 Subject: [PATCH 40/72] Update profiling_interface.tex 1/3: "code" instead of "codes" (mass noun). --- content/profiling_interface.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/profiling_interface.tex b/content/profiling_interface.tex index 51a50909..81ed2d83 100644 --- a/content/profiling_interface.tex +++ b/content/profiling_interface.tex @@ -1,6 +1,6 @@ The objective of the \openshmem profiling interface is to ensure an easy and flexible usage model for profiling (and other similar) -tool developers to interface their codes into \openshmem +tool developers to interface their code into \openshmem implementations on different platforms. Since \openshmem is a machine-independent standard with different implementations, it is unreasonable to expect that the authors and developers of profiling From 1539658268c1b57034bf74a14a5d105bb9a81266 Mon Sep 17 00:00:00 2001 From: Bryan Morgan <108841484+bcmIntc@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:31:07 -0400 Subject: [PATCH 41/72] Update shmem_pcontrol.tex 2/3: "code" instead of "codes" (mass noun). --- content/shmem_pcontrol.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_pcontrol.tex b/content/shmem_pcontrol.tex index 79abbbd7..d4aac2b3 100644 --- a/content/shmem_pcontrol.tex +++ b/content/shmem_pcontrol.tex @@ -29,7 +29,7 @@ it is impossible to precisely specify the semantics that will be provided by calls to \FUNC{shmem\_pcontrol}. This vagueness extends to the number of arguments to the function and their datatypes. However, to provide some - level of portability of user codes to different profiling libraries, the + level of portability of user code to different profiling libraries, the following \VAR{level} values are recommended. \begin{itemize} From d534723e887cdd764dd43e18515d28c41ab3b17f Mon Sep 17 00:00:00 2001 From: Bryan Morgan <108841484+bcmIntc@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:33:08 -0400 Subject: [PATCH 42/72] Update profiling_interface.tex Added "the" before \openshmem. --- content/profiling_interface.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/profiling_interface.tex b/content/profiling_interface.tex index 81ed2d83..401cc83e 100644 --- a/content/profiling_interface.tex +++ b/content/profiling_interface.tex @@ -133,7 +133,7 @@ \subsection{Limitations} \subsubsection{Multiple Counting} \label{sec:pshmem_multiple_count} -Since some functions in \openshmem library may be implemented +Since some functions in the \openshmem library may be implemented using more basic \openshmem functions, it is possible for these basic profiling functions to be called from within an \openshmem function that was originally called from a profiling routine. For example, From a34d83f21fca32261ab5356b8a6367f398b28ef1 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 13:53:32 -0400 Subject: [PATCH 43/72] changelog: entry for src buffer reqs in colectives --- content/backmatter.tex | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 65528d74..b3ab8a46 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -708,6 +708,10 @@ \section{Version 1.6} stride argument is 0 or negative. \ChangelogRef{subsec:shmem_team_split_strided} % +\item Clarified the requirements for the source buffer before entering the + collective routines. +\ChangelogRef{subsec:shmem_alltoall,subsec:shmem_broadcast,subsec:shmem_collect,subsec:shmem_reductions,subsec:shmem_scan} +% \item Added a new Errata Section~\ref{sec:errata} that indicates errors or ambiguities in the \openshmem specification and the version that required correction or clarification. \ChangelogRef{sec:errata} From 0947c3992c998740608c93f7f9207032f5c8fa7d Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 16:11:31 -0400 Subject: [PATCH 44/72] backmatter: add changelog for no team wrap-arounds --- content/backmatter.tex | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 65528d74..48351d65 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -677,6 +677,11 @@ \section{Version 1.6} \FUNC{shmem\_team\_ptr}. \ChangelogRef{subsec:shmem_team_ptr}% % +\item Clarified that the behavior of \FUNC{shmem\_team\_split\_strided} is + undefined when the input \VAR{start}, \VAR{stride}, and \VAR{size} arguments + imply a \textit{wrap-around} with respect to the parent team's \acp{PE}. +\ChangelogRef{subsec:shmem_team_split_strided}% +% \item Added the session routines, \FUNC{shmem\_ctx\_session\_start} and \FUNC{shmem\_ctx\_session\_stop}, which allow users to pass hints to the \openshmem library to apply runtime optimizations. From 8d87f9d24e549665d141f976d5350eb8a0413cbb Mon Sep 17 00:00:00 2001 From: William Okuno <38401861+wokuno@users.noreply.github.com> Date: Fri, 30 Aug 2024 15:55:26 -0500 Subject: [PATCH 45/72] Updated example 14 for OpenSHMEM 1.5 --- example_code/shmem_ctx.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/example_code/shmem_ctx.c b/example_code/shmem_ctx.c index b122e874..15b3f76b 100644 --- a/example_code/shmem_ctx.c +++ b/example_code/shmem_ctx.c @@ -1,9 +1,6 @@ #include #include -long pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; -long psync[SHMEM_REDUCE_SYNC_SIZE]; - long task_cntr = 0; /* Next task counter */ long tasks_done = 0; /* Tasks done by this PE */ long total_done = 0; /* Total tasks done by all PEs */ @@ -12,9 +9,6 @@ int main(void) { int tl, i; long ntasks = 1024; /* Total tasks per PE */ - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; - shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); if (tl != SHMEM_THREAD_MULTIPLE) shmem_global_exit(1); @@ -49,7 +43,7 @@ int main(void) { shmem_ctx_destroy(ctx); } - shmem_long_sum_to_all(&total_done, &tasks_done, 1, 0, 0, npes, pwrk, psync); + shmem_long_sum_reduce(SHMEM_TEAM_WORLD, &total_done, &tasks_done, 1); int result = (total_done != ntasks * npes); shmem_finalize(); From 1795f124c8e4ca3bbd34a3cf464840e6624be36f Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Fri, 30 Aug 2024 14:25:33 -0700 Subject: [PATCH 46/72] Address review comment --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 9232962e..76fcdca6 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -13,7 +13,7 @@ \begin{apiarguments} \apiargument{IN}{parent\_team}{An \openshmem team.} -\apiargument{IN}{start}{The first \acs{PE} number of the subset of \acp{PE} from the parent team that will form the new team. If the stride is less than zero, the first \acs{PE} number is the highest; if greater than zero, it is the lowest; if the stride is zero, it is the starting \acs{PE}.} +\apiargument{IN}{start}{The first \acs{PE} number of the subset of \acp{PE} from the parent team that will form the new team. If the stride is less than zero, the first \acs{PE} number is the highest \acs{PE} of the parent team; if it is greater than zero, it is the lowest; if the stride is zero, it is the starting \acs{PE}.} \apiargument{IN}{stride}{The stride between team \ac{PE} numbers in the parent team that comprise the subset of \acp{PE} that will form From 8ef4cb03deb0e5e8894bcb2c768f8e8f6bbdcade Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Wed, 4 Sep 2024 10:31:53 -0400 Subject: [PATCH 47/72] Add initial list of 1.6 contributors --- content/coverpage.tex | 54 ++++++++++++++++++++++++++++++++++++++++++- utils/defs.tex | 4 ++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/content/coverpage.tex b/content/coverpage.tex index a5b3df31..e26a54ca 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -47,8 +47,60 @@ \section*{Sponsored by} \end{itemize} \section*{Authors and Collaborators} -This document is a collaborative effort consisting of several releases of \openshmem versions 1.0 through 1.5. This section lists the authors and contributors in reverse chronological order, starting with \openshmem 1.5. +This document is a collaborative effort consisting of several releases of \openshmem versions 1.0 through 1.6. This section lists the authors and contributors in reverse chronological order, starting with \openshmem 1.6. +\subsection*{\openshmem 1.6} +\begin{multicols}{2} +\begin{itemize} +\setlength\itemsep{0.1em} +\item Ferrol Aderholdt, NVIDIA +\item Muhammad Awad, \ac{AMD} +\item Matthew Baker, \ac{ORNL} +\item Swen Boehm, \ac{ORNL} +\item Aurelien Bouteiller, \ac{UTK} +\item Mark Brown, Intel +\item Bob Cernohous, \ac{HPE} +\item James Dinan\footnotemark[1], NVIDIA +\item Megan Grodowitz, Arm Inc. +\item Max Grossman, Georgia Tech +\item Yanfei Guo, \ac{ANL} +\item Khaled Hamidouche, NVIDIA +\item Jeff Hammond, NVIDIA +\item Akihiro Hayashi, Georgia Tech +\item Oscar Hernandez, \ac{ORNL} +\item Kieran Holland, Intel +\item Robert Kierski, \ac{HPE} +\item Bryant Lam, \ac{DoD} +\item Akhil Langer, NVIDIA +\item Tiffany M. Mintz, \ac{ORNL} +\item Bryan Morgan, Intel +\item William Okuno\footnotemark[2], \ac{HPE} +\item David Ozog\footnotemark[5], Intel +\item Nicholas Park, \ac{DoD} +\item Wendy Poole, \ac{LANL} +\item Steve Poole\footnotemark[6], \ac{OSSS} +\item Swaroop Pophale, \ac{ORNL} +\item Sreeram Potluri, NVIDIA +\item Brandon Potter\footnotemark[4], \ac{AMD} +\item Howard Pritchard, \ac{LANL} +\item Md. Wasi-ur- Rahman\footnotemark[11], Intel +\item Naveen Ravichandrasekaran\footnotemark[9], \ac{HPE} +\item Michael Raymond, \ac{HPE} +\item Elliot Ronaghan\footnotemark[8], \ac{HPE} +\item James Ross, \ac{ARL} +\item Pavel Shamis, NVIDIA +\item Sameer Shende, \ac{UO} +\item Danielle Sikich, \ac{HPE} +\item Brian Smith, Cornelis Networks +\item Lawrence Stewart\footnotemark[7], Intel +\item Zach Tiffany, NVIDIA +\item Manjunath Gorentla Venkata\footnotemark[10], NVIDIA +\item Kevin Waters\footnotemark[3], \ac{DoD} +\item Aaron Welch, \ac{ORNL} +\item Nathan Wichmann, \ac{HPE} +\item Jeffrey Young, Georgia Tech +\end{itemize} +\end{multicols} \subsection*{\openshmem 1.5} \begin{multicols}{2} diff --git a/utils/defs.tex b/utils/defs.tex index 771ba8a7..0a298f43 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -105,7 +105,7 @@ \acro{API}{\emph{Application Programming Interface}} \acro{MPI}{\emph{Message Passing Interface}} \acro{SPMD}{\emph{Single Program Multiple Data}} -\acro{ANL}{Argonne National Labratory} +\acro{ANL}{Argonne National Laboratory} \acro{ARL}{Army Research Laboratory} \acro{AMD}{Advanced Micro Devices} \acro{MPMD}{\emph{Multiple Program Multiple Data}} @@ -120,7 +120,7 @@ \acro{SGI}{Silicon Graphics International} \acro{DoD}{U.S. Department of Defense} \acro{SBU}{Stonybrook University} -\acro{UTK}{University of Tenneesee at Knoxville} +\acro{UTK}{University of Tennessee at Knoxville} \acro{HPE}{Hewlett Packard Enterprise} \end{acronym} From 9b9230d7516667ad18028a18e186d0e34e01a3e0 Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Wed, 4 Sep 2024 11:51:23 -0500 Subject: [PATCH 48/72] Moved query_initialized to 9.1 --- main_spec.tex | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/main_spec.tex b/main_spec.tex index 03e10779..6c3935d6 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -56,6 +56,10 @@ \subsubsection{\textbf{SHMEM\_N\_PES}}\label{subsec:shmem_n_pes} \subsubsection{\textbf{SHMEM\_FINALIZE}}\label{subsec:shmem_finalize} \input{content/shmem_finalize} +\subsubsection{\textbf{SHMEM\_QUERY\_INITIALIZED}} +\label{subsec:shmem_query_initialized} +\input{content/shmem_query_initialized} + \subsubsection{\textbf{SHMEM\_GLOBAL\_EXIT}}\label{subsec:shmem_global_exit} \input{content/shmem_global_exit} @@ -92,11 +96,6 @@ \subsubsection{\textbf{SHMEM\_QUERY\_THREAD}} \label{subsec:shmem_query_thread} \input{content/shmem_query_thread} -\subsubsection{\textbf{SHMEM\_QUERY\_INITIALIZED}} -\label{subsec:shmem_query_initialized} -\input{content/shmem_query_initialized} - - \subsection{Memory Management Routines} \label{sec:memory_management} \input{content/memmgmt_intro.tex} From 362a6a6af14a8fb6699d329f4f435f41b8517ec8 Mon Sep 17 00:00:00 2001 From: Brandon Potter Date: Thu, 5 Sep 2024 12:15:04 -0500 Subject: [PATCH 49/72] Remove unnecessary whitespace --- content/atomics_intro.tex | 2 +- content/backmatter.tex | 2 +- content/execution_model.tex | 2 +- content/frontmatter.tex | 6 +++--- content/memmgmt_intro.tex | 2 +- content/rma_intro.tex | 6 +++--- content/shmem_addr_accessible.tex | 2 +- content/shmem_alltoalls.tex | 9 ++++----- content/shmem_barrier_all.tex | 2 +- content/shmem_broadcast.tex | 4 ++-- content/shmem_collect.tex | 12 ++++++------ content/shmem_finalize.tex | 2 +- content/shmem_g.tex | 2 +- content/shmem_init_thread.tex | 22 +++++++++++----------- content/shmem_malloc_hints.tex | 18 +++++++++--------- content/shmem_p.tex | 2 +- content/shmem_pe_quiet.tex | 18 +++++++++--------- content/shmem_ptr.tex | 2 +- content/shmem_reductions.tex | 2 +- content/shmem_scan.tex | 4 ++-- content/shmem_team_ptr.tex | 2 +- content/shmem_wait_until_all.tex | 2 +- content/shmem_wait_until_all_vector.tex | 2 +- content/shmem_wait_until_any.tex | 2 +- content/shmem_wait_until_any_vector.tex | 2 +- content/shmem_wait_until_some.tex | 2 +- content/shmem_wait_until_some_vector.tex | 2 +- content/the_openshmem_effort.tex | 2 +- 28 files changed, 68 insertions(+), 69 deletions(-) diff --git a/content/atomics_intro.tex b/content/atomics_intro.tex index d803887d..67b0acdf 100644 --- a/content/atomics_intro.tex +++ b/content/atomics_intro.tex @@ -33,7 +33,7 @@ \openshmem \ac{AMO} routines specified in this section have two variants. In one of the variants, the context handle, \VAR{ctx}, is explicitly passed as -an argument. In this variant, the operation is performed on the specified +an argument. In this variant, the operation is performed on the specified context. If the context handle \VAR{ctx} does not correspond to a valid context, the behavior is undefined. In the other variant, the context handle is not explicitly passed and thus, the operations are performed on the diff --git a/content/backmatter.tex b/content/backmatter.tex index 16e7ffcc..498bbf4b 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -993,7 +993,7 @@ \section{Version 1.4} % \item Expanded the type support for \ac{RMA}, \ac{AMO}, and point-to-point synchronization operations. -%% cleveref will compress a list of references by default. It is better to not +%% cleveref will compress a list of references by default. It is better to not %% compress this list of *table* references because the clickable hyperref %% links are useful. You can tell cleveref to not compress the LHS and RHS by %% inserting an empty item between them; i.e., `,,`. diff --git a/content/execution_model.tex b/content/execution_model.tex index a56f8bda..e001680b 100644 --- a/content/execution_model.tex +++ b/content/execution_model.tex @@ -10,7 +10,7 @@ communicate and synchronize among executing \acp{PE}. The \openshmem phase in a program begins with the first call to the initialization routine \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread}, which must be performed before using any of the -other \openshmem library routines. +other \openshmem library routines. An \openshmem program concludes its use of the \openshmem library when all \acp{PE} make their final call to \FUNC{shmem\_finalize} or any \ac{PE} calls \FUNC{shmem\_global\_exit}. diff --git a/content/frontmatter.tex b/content/frontmatter.tex index cf5a6ca3..43b5cf35 100644 --- a/content/frontmatter.tex +++ b/content/frontmatter.tex @@ -8,7 +8,7 @@ \SetWatermarkText{DRAFT} \SetWatermarkScale{1} \SetWatermarkLightness{.91} -\fancyfoot[C]{\thepage} %affects page numbering for the first pages, +\fancyfoot[C]{\thepage} %affects page numbering for the first pages, %except the first ToC page \pagenumbering{roman} %sets coverpage and toc page numbers to roman numerals @@ -19,10 +19,10 @@ \setcounter{secnumdepth}{4} \tableofcontents -\mainmatter % included for use of documenttype 'book' +\mainmatter % included for use of documenttype 'book' % Set header/footer for main content -\pagestyle{fancy} %replacing {headings} with {fancy} for customization +\pagestyle{fancy} %replacing {headings} with {fancy} for customization \fancyhf{} \fancyhead[L]{\leftmark} \fancyhead[R]{\thepage} diff --git a/content/memmgmt_intro.tex b/content/memmgmt_intro.tex index 8cb6605c..660e5bc7 100644 --- a/content/memmgmt_intro.tex +++ b/content/memmgmt_intro.tex @@ -17,7 +17,7 @@ The total size of the symmetric heap is determined at job startup. One can specify the size of the heap using the \ENVVAR{SHMEM\_SYMMETRIC\_SIZE} environment -variable (where available). +variable (where available). \begin{DeprecateBlock} As of \openshmem[1.2] the use of \FUNC{shmalloc}, \FUNC{shmemalign}, diff --git a/content/rma_intro.tex b/content/rma_intro.tex index f986d6c8..0cfd8cd0 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -32,10 +32,10 @@ is not explicitly passed and thus, the operations are performed on the default context. -Where appropriate compiler support is available, \openshmem provides type-generic +Where appropriate compiler support is available, \openshmem provides type-generic one-sided communication interfaces via \Cstd[11] generic selection (\Cstd[11]~\S6.5.1.1\footnote{Formally, the \Cstd[11] specification is ISO/IEC 9899:2011(E).}) -for block, scalar, and block-strided put and get communication. +for block, scalar, and block-strided put and get communication. Such type-generic routines are supported for the ``standard \ac{RMA} types'' listed in Table \ref{stdrmatypes}. @@ -78,5 +78,5 @@ \end{tabular} \TableCaptionRef{Standard \ac{RMA} Types and Names} \label{stdrmatypes} - \end{center} + \end{center} \end{table} diff --git a/content/shmem_addr_accessible.tex b/content/shmem_addr_accessible.tex index 14b83766..d32e54be 100644 --- a/content/shmem_addr_accessible.tex +++ b/content/shmem_addr_accessible.tex @@ -18,7 +18,7 @@ \FUNC{shmem\_addr\_accessible} is a query routine that indicates whether the address \VAR{addr} can be used to access the given data object on the specified \ac{PE} via \openshmem routines. - + This routine verifies that the data object is symmetric and accessible with respect to a remote \ac{PE} via \openshmem data transfer routines. The specified address \VAR{addr} is the local address of the data object on the diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index e371b8cf..76d769e3 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -33,7 +33,7 @@ \apiargument{IN}{team}{A valid \openshmem team handle.}% -\apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive +\apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the active set. The type of \dest{} should match that implied in the SYNOPSIS section.} @@ -53,7 +53,7 @@ for \FUNC{shmem\_alltoalls\{32,64\}}, elements are 4 or 8 bytes, respectively. } - + \begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}.} @@ -82,7 +82,7 @@ The same \dest{} and \source{} arrays and same values for values of arguments \VAR{dst}, \VAR{sst}, \VAR{nelems} must be passed by all \acp{PE} that participate in the collective. - + Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} participating in the operation and a \ac{PE} \VAR{j} that is the \lth \ac{PE} @@ -99,8 +99,7 @@ \item The pre- and post-conditions for symmetric objects. \item Typing constraints for \dest{} and \source{} data objects. \end{itemize} - -} +} \apireturnvalues{ diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index 4f2675bb..bb81c6fb 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -16,7 +16,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_barrier\_all} routine is a mechanism for synchronizing all \acp{PE} in the world team at once. This routine blocks the calling \ac{PE} until all \acp{PE} have called diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index a172a12e..f6937f06 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -60,7 +60,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ \openshmem broadcast routines are collective routines over an active set or valid \openshmem team. They copy the \source{} data object on the \ac{PE} specified by @@ -82,7 +82,7 @@ between \CONST{0} and \VAR{N$-$1}, where \VAR{N} is the size of the team. \end{itemize} - + For active-set-based broadcasts: \begin{itemize} \item The \dest{} object is updated on all \acp{PE} other than the diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 5430abcf..4040b97d 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -69,18 +69,18 @@ \dest{} array, over an \openshmem team or active set in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: - + \begin{itemize} \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} - + The collected result is written to the \dest{} array for all \acp{PE} that participate in the operation. The same \dest{} and \source{} arrays must be passed by all \acp{PE} that participate in the operation. - + The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. @@ -96,15 +96,15 @@ each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set and calls this collective routine, the behavior is undefined. - + The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same \VAR{pSync} work array must be passed by all \acp{PE} in the active set. - + Upon return from a collective routine, the following are true for the local \ac{PE}: \begin{itemize} - \item The \dest{} array is updated and the \source{} array may be safely reused. + \item The \dest{} array is updated and the \source{} array may be safely reused. \item For active-set-based collective routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} diff --git a/content/shmem_finalize.tex b/content/shmem_finalize.tex index 5496e9bf..7e508f12 100644 --- a/content/shmem_finalize.tex +++ b/content/shmem_finalize.tex @@ -23,7 +23,7 @@ An \openshmem program may perform a series of matching initialization and finalization calls. The last call to \FUNC{shmem\_finalize} in this series - releases all resources used by the \openshmem library. + releases all resources used by the \openshmem library. This call destroys all teams created by the \openshmem program. As a result, all shareable contexts are destroyed. The user is diff --git a/content/shmem_g.tex b/content/shmem_g.tex index 1fa6a2c9..ff5384f8 100644 --- a/content/shmem_g.tex +++ b/content/shmem_g.tex @@ -27,7 +27,7 @@ \apidescription{ These routines provide a very low latency get capability for single elements - of most basic types. + of most basic types. } \apireturnvalues{ diff --git a/content/shmem_init_thread.tex b/content/shmem_init_thread.tex index f1f397d8..a5d81ae9 100644 --- a/content/shmem_init_thread.tex +++ b/content/shmem_init_thread.tex @@ -15,12 +15,12 @@ \end{apiarguments} \apidescription{ -\FUNC{shmem\_init\_thread} initializes the \openshmem library in the same way as -\FUNC{shmem\_init}. In addition, \FUNC{shmem\_init\_thread} also performs -the initialization required for supporting the provided thread level. -The argument \VAR{requested} is used to specify the desired level of -thread support. The argument \VAR{provided} returns the support level -provided by the library. The allowed values for \VAR{provided} and +\FUNC{shmem\_init\_thread} initializes the \openshmem library in the same way as +\FUNC{shmem\_init}. In addition, \FUNC{shmem\_init\_thread} also performs +the initialization required for supporting the provided thread level. +The argument \VAR{requested} is used to specify the desired level of +thread support. The argument \VAR{provided} returns the support level +provided by the library. The allowed values for \VAR{provided} and \VAR{requested} are \CONST{SHMEM\_THREAD\_SINGLE}, \CONST{SHMEM\_THREAD\_FUNNELED}, \CONST{SHMEM\_THREAD\_SERIALIZED}, and \CONST{SHMEM\_THREAD\_MULTIPLE}. @@ -32,8 +32,8 @@ re-initialized with a subsequent call to an initialization routine. If the call to \FUNC{shmem\_init\_thread} -is unsuccessful in allocating and initializing resources for the -\openshmem library, then the behavior of any subsequent call +is unsuccessful in allocating and initializing resources for the +\openshmem library, then the behavior of any subsequent call to the \openshmem library is undefined. @@ -45,9 +45,9 @@ } \apinotes{ -The \openshmem library can be initialized either by \FUNC{shmem\_init} -or \FUNC{shmem\_init\_thread}. If the \openshmem library is initialized -by \FUNC{shmem\_init}, the library implementation can choose to +The \openshmem library can be initialized either by \FUNC{shmem\_init} +or \FUNC{shmem\_init\_thread}. If the \openshmem library is initialized +by \FUNC{shmem\_init}, the library implementation can choose to support any one of the defined thread levels. The \openshmem library may not be able to change the level of threading support diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index ef4cbfc2..0377dc4f 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -22,19 +22,19 @@ is a collective operation on the world team that returns a pointer to a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be assigned to a pointer to any type of object. This space is allocated from - the symmetric heap (similar to \FUNC{shmem\_malloc}). When the \VAR{size} is zero, - the \FUNC{shmem\_malloc\_with\_hints} routine performs no action and returns a null pointer. - - In addition to the \VAR{size} argument, the \VAR{hints} argument is provided by the user. + the symmetric heap (similar to \FUNC{shmem\_malloc}). When the \VAR{size} is zero, + the \FUNC{shmem\_malloc\_with\_hints} routine performs no action and returns a null pointer. + + In addition to the \VAR{size} argument, the \VAR{hints} argument is provided by the user. The \VAR{hints} describes the expected manner in which the \openshmem program may use the allocated memory. The valid usage hints are described in Table~\ref{usagehints}. Multiple hints may be requested by combining them with a bitwise \CONST{OR} operation. A zero option can be given if no options are requested. - - The information provided by the \VAR{hints} is used to optimize for performance by the implementation. + + The information provided by the \VAR{hints} is used to optimize for performance by the implementation. If the implementation cannot optimize, the behavior is same as \FUNC{shmem\_malloc}. - If more than one hint is provided, the implementation will make the best effort to use one or more hints - to optimize performance. - + If more than one hint is provided, the implementation will make the best effort to use one or more hints + to optimize performance. + The \FUNC{shmem\_malloc\_with\_hints} routine is provided so that multiple \acp{PE} in a program can allocate symmetric, remotely accessible memory blocks. When no action is performed, these routines return without performing a barrier. Otherwise, the routine will call a procedure that is semantically equivalent to \FUNC{shmem\_barrier\_all} on exit. diff --git a/content/shmem_p.tex b/content/shmem_p.tex index 71e5594b..0887b507 100644 --- a/content/shmem_p.tex +++ b/content/shmem_p.tex @@ -30,7 +30,7 @@ \apidescription{ These routines provide a very low latency put capability for single elements of most basic types. - + As with \FUNC{shmem\_put}, these routines start the remote transfer and may return before the data is delivered to the remote \ac{PE}. Use \FUNC{shmem\_quiet} to force completion of all remote \PUT{} transfers. diff --git a/content/shmem_pe_quiet.tex b/content/shmem_pe_quiet.tex index 72ff2963..f0336c20 100644 --- a/content/shmem_pe_quiet.tex +++ b/content/shmem_pe_quiet.tex @@ -1,7 +1,7 @@ \apisummary{ - Waits for completion of all outstanding memory store, blocking - \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as - nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines + Waits for completion of all outstanding memory store, blocking + \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as + nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines to symmetric data objects issued by the calling \ac{PE} at the target \acp{PE}. } @@ -17,22 +17,22 @@ \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation. When this argument is not provided, the operation is performed on the default context.} - \apiargument{IN}{target\_pes}{Address of target \ac{PE} array where the + \apiargument{IN}{target\_pes}{Address of target \ac{PE} array where the operations need to be completed} - \apiargument{IN}{npes}{The number of \acp{PE} in the target \ac{PE} array} + \apiargument{IN}{npes}{The number of \acp{PE} in the target \ac{PE} array} \end{apiarguments} \apidescription{ - The \FUNC{shmem\_pe\_quiet} ensures completion of memory store, blocking + The \FUNC{shmem\_pe\_quiet} ensures completion of memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines on the symmetric data objects issued by the calling \ac{PE} to the target \acp{PE} and on the given context. If \VAR{npes} is set to 0, the \VAR{target\_pes} is ignored and the routine returns immediately. - - The completion and visibility semantics of these operations are the same as the - \FUNC{shmem\_quiet} routine. However, it applies only to the target + + The completion and visibility semantics of these operations are the same as the + \FUNC{shmem\_quiet} routine. However, it applies only to the target \acp{PE}, i.e., the operations to the target \acp{PE} are guaranteed to be complete and visible to all \acp{PE} when \FUNC{shmem\_pe\_quiet} returns. } diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index f5c4d7e9..9207ac96 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -23,7 +23,7 @@ to a remotely accessible data object. Providing this address to an argument of an \openshmem routine that requires a symmetric address results in undefined behavior. - + The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish communication, for example when a sequence of reads and writes to a data object on a remote \ac{PE} does not match the access pattern provided in an diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index ff933b35..f2aafdfd 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -319,7 +319,7 @@ \subsubsubsection{PROD} \openshmem routine. \end{itemize} Otherwise, the behavior is undefined. - + Upon return from a reduction routine, the following are true for the local \ac{PE}: \begin{itemize} diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 618a51a0..f86c7435 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -57,7 +57,7 @@ The \FUNC{shmem\_sum\_inscan} routine performs an inclusive scan operation, while the \FUNC{shmem\_sum\_exscan} routine performs an - exclusive scan operation. + exclusive scan operation. For \FUNC{shmem\_sum\_inscan}, the value of the $j$-th element in the \VAR{dest} array on \ac{PE}~$i$ is defined as: @@ -89,7 +89,7 @@ Before any \ac{PE} calls a scan routine, the \dest{} array on all \acp{PE} participating in the operation must be ready to accept the results of the operation. Otherwise, the behavior is undefined. - + Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array may be safely reused. diff --git a/content/shmem_team_ptr.tex b/content/shmem_team_ptr.tex index af158c31..340419b5 100644 --- a/content/shmem_team_ptr.tex +++ b/content/shmem_team_ptr.tex @@ -24,7 +24,7 @@ a remotely accessible data object. Providing this address to an argument of an \openshmem routine that requires a symmetric address results in undefined behavior. - + The \FUNC{shmem\_team\_ptr} routine can provide an efficient means to accomplish communication, for example when a sequence of reads and writes to a data object on a remote \ac{PE} does not match the access pattern provided in an diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index d4dfd4b4..4f1c8c15 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -33,7 +33,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait\_until\_all} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex index a3abdf9c..b8006f1c 100644 --- a/content/shmem_wait_until_all_vector.tex +++ b/content/shmem_wait_until_all_vector.tex @@ -34,7 +34,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait\_until\_all\_vector} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait conditions at the calling \ac{PE}. The \VAR{ivars} diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index e94e9afb..ce119eca 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -34,7 +34,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait\_until\_any} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index 09bcc5c7..30ebd077 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -35,7 +35,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait\_until\_any\_vector} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index 9af90fbb..8bcf4975 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -36,7 +36,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait\_until\_some} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex index e3a414fb..9b219cf4 100644 --- a/content/shmem_wait_until_some_vector.tex +++ b/content/shmem_wait_until_some_vector.tex @@ -37,7 +37,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait\_until\_some\_vector} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. diff --git a/content/the_openshmem_effort.tex b/content/the_openshmem_effort.tex index f83d3188..a3321cb5 100644 --- a/content/the_openshmem_effort.tex +++ b/content/the_openshmem_effort.tex @@ -9,7 +9,7 @@ code. This ensures that programs can run on multiple platforms without having to deal with subtle vendor-specific implementation differences. For more details on the history of \openshmem please refer to the -\hyperref[sec:openshmem_history]{History of \openshmem} section. +\hyperref[sec:openshmem_history]{History of \openshmem} section. The \openshmem\footnote{The \openshmem specification is owned by Open Source Software Solutions Inc., a nonprofit organization, under an agreement with From 98c8718ddb3cead8095e664037337de5b965db9a Mon Sep 17 00:00:00 2001 From: Brandon Potter Date: Thu, 5 Sep 2024 12:22:57 -0500 Subject: [PATCH 50/72] Spelling fix for implementer text --- content/profiling_interface.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/profiling_interface.tex b/content/profiling_interface.tex index 401cc83e..9fec4875 100644 --- a/content/profiling_interface.tex +++ b/content/profiling_interface.tex @@ -6,7 +6,7 @@ unreasonable to expect that the authors and developers of profiling tools for \openshmem will have access to the source code that implements \openshmem on any particular machine. It is, therefore, -necessary to provide a mechanism by which the implementors of such +necessary to provide a mechanism by which the implementers of such tools can collect whatever performance information they wish \emph{without} access to the underlying implementation. @@ -48,7 +48,7 @@ \end{enumerate} Provided that an \openshmem implementation meets these requirements, -it is possible for the implementor of the profiling system +it is possible for the implementer of the profiling system to intercept the \openshmem calls that are made by the user program. The information required can be collected before and after calling the underlying \openshmem implementation through the name From aae7572c566e00300322f79d6985bd8096e4a7ca Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Wed, 20 Mar 2024 08:08:12 -0400 Subject: [PATCH 51/72] Deprecate blocks for Collect, Broadcast, update def apireturnvalues --- content/shmem_broadcast.tex | 5 +++++ content/shmem_collect.tex | 8 ++++++++ utils/defs.tex | 3 +-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index a172a12e..3e4b376e 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -83,6 +83,7 @@ the team. \end{itemize} +\begin{DeprecateBlock} For active-set-based broadcasts: \begin{itemize} \item The \dest{} object is updated on all \acp{PE} other than the @@ -128,13 +129,17 @@ \end{itemize} \item The \source{} data object may be safely reused. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ For team-based broadcasts, zero on successful local completion; otherwise, nonzero. +\begin{DeprecateBlock} For active-set-based broadcasts, none. +\end{DeprecateBlock} + } \apinotes{ diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 5430abcf..897bdcb3 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -90,6 +90,7 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. +\begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, @@ -108,6 +109,7 @@ \item For active-set-based collective routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ @@ -115,9 +117,15 @@ } \apinotes{ +\begin{DeprecateBlock} The collective routines operate on active \ac{PE} sets that have a non-power-of-two \VAR{PE\_size} with some performance degradation. They operate with no performance degradation when \VAR{nelems} is a non-power-of-two value. +\end{DeprecateBlock} + The collective routines that operate on teams containing a + non-power-of-two of PEs do so with some performance degradation. They operate + with no performance degradation when \VAR{nelems} is a non-power-of-two value. + } \begin{apiexamples} diff --git a/utils/defs.tex b/utils/defs.tex index 771ba8a7..9d2bdb64 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -362,8 +362,7 @@ \hfill \item[Return Values] \hfill \\ #1 -\\ -\hfill +\hfill \\ } \newcommand{\apitablerow}[2]{ From 7b2dbd30460530e997be3f5f70e054431206a03c Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Wed, 20 Mar 2024 11:42:13 -0400 Subject: [PATCH 52/72] Deprecate active-set language in Collectives, missing Reductions --- content/shmem_alltoall.tex | 38 ++++++++++++++++++-------- content/shmem_alltoalls.tex | 4 +-- content/shmem_broadcast.tex | 53 ++++++++++++++++++++++++------------- content/shmem_collect.tex | 23 +++++++++++++--- content/shmem_sync.tex | 19 ++++++++++--- 5 files changed, 99 insertions(+), 38 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 188e2875..c37823d8 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -35,10 +35,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + particpating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ @@ -100,6 +100,21 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{dest} data object on all \acp{PE} in the team is + ready to accept the \FUNC{shmem\_alltoall} data. + \end{itemize} + + Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for + the local PE: + \begin{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \end{itemize} + +\begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -117,22 +132,23 @@ Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: \begin{itemize} - \item The \VAR{dest} data object on all \acp{PE} in the active set is - ready to accept the \FUNC{shmem\_alltoall} data. - \item For active-set-based routines, the \VAR{pSync} array - on all \acp{PE} in the active set is not still in use from a prior call - to a \FUNC{shmem\_alltoall} routine. + \item The \VAR{dest} data object on all \acp{PE} in the active set is + ready to accept the \FUNC{shmem\_alltoall} data. + \item For active-set-based routines, the \VAR{pSync} array + on all \acp{PE} in the active set is not still in use from a prior call + to a \FUNC{shmem\_alltoall} routine. \end{itemize} Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and - the data has been copied out of the \VAR{source} data object. - \item For active-set-based routines, - the values in the \VAR{pSync} array are restored to the original values. + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \item For active-set-based routines, + the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index e371b8cf..d1bd7d1f 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -35,10 +35,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + participating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{dst}{The stride between consecutive elements of the \dest{} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 3e4b376e..5aec7b9d 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -45,7 +45,7 @@ respectively. } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the team or active set, from which the data is copied.} + the calling PEs, from which the data is copied.} \begin{DeprecateBlock} @@ -61,8 +61,7 @@ \end{apiarguments} \apidescription{ - \openshmem broadcast routines are collective routines over an active set or - valid \openshmem team. + \openshmem team-based broadcast routines are collective routines over a valid \openshmem team. They copy the \source{} data object on the \ac{PE} specified by \VAR{PE\_root} to the \dest{} data object on the \acp{PE} participating in the collective operation. @@ -75,6 +74,9 @@ \item The \dest{} object is updated on all \acp{PE}. \item All \acp{PE} in the \VAR{team} argument must participate in the operation. + \item Only \acp{PE} in the team may call the routine. If a + \ac{PE} not in the team calls a team-based + collective routine, the behavior is undefined. \item If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. \item \ac{PE} numbering is relative to the team. The specified @@ -82,12 +84,34 @@ between \CONST{0} and \VAR{N$-$1}, where \VAR{N} is the size of the team. \end{itemize} + + Before any \ac{PE} calls a broadcast routine, the following + conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon return from a team-based broadcast routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} \begin{DeprecateBlock} + \openshmem active-set broadcast routines are collective routines over an active set. + They copy the \source{} data object on the \ac{PE} specified by + \VAR{PE\_root} to the \dest{} data object on the \acp{PE} + participating in the collective operation. + The same \dest{} and \source{} data objects and the same value of + \VAR{PE\_root} must be passed by all \acp{PE} participating in the + collective operation. + For active-set-based broadcasts: \begin{itemize} - \item The \dest{} object is updated on all \acp{PE} other than the - root \ac{PE}. + \item The \VAR{dest} object is updated on all PEs other than the root PE. \item All \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet must participate in the operation. @@ -103,31 +127,24 @@ in the active set. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following + Before any \ac{PE} calls a active-set-based broadcast routine, the following conditions must be ensured: \begin{itemize} \item The \dest{} array on all \acp{PE} participating in the broadcast is ready to accept the broadcast data. - \item For active-set-based broadcasts, the - \VAR{pSync} array on all \acp{PE} in the + \item The \VAR{pSync} array on all \acp{PE} in the active set is not still in use from a prior call to an \openshmem collective routine. \end{itemize} - Otherwise, the behavior is undefined. + Otherwise, the behavior is undefined. - Upon return from a broadcast routine, the following are true for the local + Upon return from a active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item For team-based broadcasts, the \dest{} data object is - updated. - \item For active-set-based broadcasts: - \begin{itemize} - \item If the current \ac{PE} is not the root \ac{PE}, the - \dest{} data object is updated. + \item If the current PE is not the root PE, the \dest{} data object is updated. + \item The \source{} data object may be safely reused. \item The values in the \VAR{pSync} array are restored to the original values. - \end{itemize} - \item The \source{} data object may be safely reused. \end{itemize} \end{DeprecateBlock} } diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 897bdcb3..63814c79 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -70,9 +70,7 @@ in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: - \begin{itemize} - \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the - contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \begin{itemize} \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} @@ -91,6 +89,25 @@ otherwise invalid, the behavior is undefined. \begin{DeprecateBlock} + \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective + operation to concatenate \VAR{nelems} + data items from the \source{} array into the + \dest{} array, over an \openshmem active set + in processor number order. The resultant \dest{} array contains the contribution from + \acp{PE} as follows: + \begin{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \end{itemize} + + The collected result is written to the \dest{} array for all \acp{PE} + that participate in the operation. The same \dest{} and \source{} + arrays must be passed by all \acp{PE} that participate in the operation. + + The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all + participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to + vary from \ac{PE} to \ac{PE}. + Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 6e41ee82..8ba9b042 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -38,12 +38,12 @@ \apidescription{ \FUNC{shmem\_sync} is a collective synchronization routine over an - existing \openshmem team or active set. + existing \openshmem team. The routine registers the arrival of a \ac{PE} at a synchronization point in the program. This is a fast mechanism for synchronizing all \acp{PE} that participate in this collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the - specified team or active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + specified team have called \FUNC{shmem\_sync}. In a multithreaded \openshmem program, only the calling thread is blocked. Team-based sync routines operate over all \acp{PE} in the provided team argument. All @@ -51,6 +51,15 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. +\begin{DeprecateBlock} + \FUNC{shmem\_sync} is a collective synchronization routine over an active set. + + The routine registers the arrival of a \ac{PE} at a synchronization point in the program. + This is a fast mechanism for synchronizing all \acp{PE} that participate in this + collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the + active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + program, only the calling thread is blocked. + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -64,12 +73,14 @@ \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. + The same \VAR{pSync} array may be reused on consecutive calls to + \FUNC{shmem\_sync} if the same active set is used. +\end{DeprecateBlock} + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. - The same \VAR{pSync} array may be reused on consecutive calls to - \FUNC{shmem\_sync} if the same active set is used. } \apireturnvalues{ From 6ad386ad671551732e2a75b031d88b944ee99b12 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 5 Sep 2024 15:09:33 -0400 Subject: [PATCH 53/72] Reductions, Programming Model, strided teams active set langauge deprecated/removed --- content/collective_intro.tex | 7 +++-- content/shmem_collect.tex | 4 +-- content/shmem_reductions.tex | 40 +++++++++++++++++++++++++--- content/shmem_sync.tex | 13 ++++++--- content/shmem_team_split_strided.tex | 6 ++--- 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 823164ab..a8bf37ff 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -1,7 +1,7 @@ \emph{Collective routines} are defined as coordinated communication or synchronization operations performed by a group of \acp{PE}. -\openshmem provides three types of collective routines: +\openshmem provides four types of collective routines: \begin{enumerate} \item Collective routines that operate on teams use a team handle parameter to determine @@ -11,9 +11,12 @@ \begin{DeprecateBlock} \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. + +\item Collective routines that do not accept an active set + parameters and, as required, the default context. \end{DeprecateBlock} -\item Collective routines that accept neither team nor active set +\item Collective routines that do not accept team parameters, which implicitly operate on the world team and, as required, the default context. \end{enumerate} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 63814c79..68b3e614 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -66,8 +66,8 @@ \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} data items from the \source{} array into the - \dest{} array, over an \openshmem team or active set - in processor number order. The resultant \dest{} array contains the contribution from + \dest{} array, over an \openshmem team in processor number order. + The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index ff933b35..be5543c2 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -251,12 +251,14 @@ \subsubsubsection{PROD} \apiargument{IN}{source}{Symmetric address of an array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. +\apiargument{IN}{nreduce}{the number of elements in the \dest{} and \source{} + arrays. in teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. In deprecated active-set based \ac{API} calls, \VAR{nreduce} must be of type integer.} \begin{DeprecateBlock} +\apiargument{IN}{nreduce}{In active-set based \ac{API} calls, + \VAR{nreduce} must be of type integer.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive @@ -273,7 +275,7 @@ \subsubsubsection{PROD} \end{apiarguments} \apidescription{ - \openshmem reduction routines are collective routines over an active set or + \openshmem reduction routines are collective routines over an existing \openshmem team that compute one or more reductions across symmetric arrays on multiple \acp{PE}. A reduction performs an associative binary routine across a set of values. @@ -295,6 +297,37 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the reduction + is ready to accept the results of the \OPR{reduction}. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon return from a reduction routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} array is updated and the \source{} array may be safely reused. + \end{itemize} + +\begin{DeprecateBlock} + \openshmem reduction routines are collective routines over an active set + that compute one or more reductions across symmetric + arrays on multiple \acp{PE}. A reduction performs an associative binary routine + across a set of values. + + The \VAR{nreduce} argument determines the number of separate reductions to + perform. The \source{} array on all \acp{PE} participating in the reduction + provides one element for each reduction. The results of the reductions are placed in the + \dest{} array on all \acp{PE} participating in the reduction. + + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. + The \source{} and \dest{} arguments must either be the same symmetric + address, or two different symmetric addresses corresponding to buffers that + do not overlap in memory. That is, they must be completely overlapping (sometimes referred to as an ``in place'' reduction) or + completely disjoint. + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -327,6 +360,7 @@ \subsubsubsection{PROD} \item If using active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} The complex-typed interfaces are only provided for sum and product reductions. When the \Cstd translation environment does not support complex types diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 8ba9b042..91a2ce61 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,7 +1,11 @@ \apisummary{ Registers the arrival of a \ac{PE} at a synchronization point. This routine does not return until all other \acp{PE} in a given OpenSHMEM team - or active set arrive at this synchronization point. + arrive at this synchronization point. +\begin{DeprecateBlock} + Registers the arrival of a \ac{PE} at a synchronization point. + This routine does not return until all other \acp{PE} in a given OpenSHMEM active set arrive at this synchronization point. +\end{DeprecateBlock} } \begin{apidefinition} @@ -51,6 +55,10 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only + ensures completion and visibility of previously issued memory stores and does not ensure + completion of remote memory updates issued via \openshmem routines. + \begin{DeprecateBlock} \FUNC{shmem\_sync} is a collective synchronization routine over an active set. @@ -77,9 +85,6 @@ \FUNC{shmem\_sync} if the same active set is used. \end{DeprecateBlock} - In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only - ensures completion and visibility of previously issued memory stores and does not ensure - completion of remote memory updates issued via \openshmem routines. } diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 08969792..cd1e4c81 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -102,10 +102,8 @@ \apinotes{ The \FUNC{shmem\_team\_split\_strided} operation uses an arbitrary - \VAR{stride} argument, whereas the \VAR{logPE\_stride} argument to the - active set collective operations only permits strides that are a power of two. - Arbitrary strides allow a greater number of PE subsets to be expressed - and can support a broader range of usage models. + \VAR{stride} argument. Arbitrary strides allow a greater number of + PE subsets to be expressed and can support a broader range of usage models. See the description of team handles and predefined teams in Section~\ref{subsec:team} for more information about team handle semantics and usage. From c7aad29af26bcec823a4d1c115409221ee5a4e67 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 28 Mar 2024 09:46:40 -0400 Subject: [PATCH 54/72] Indent in shmem_alltoall --- content/shmem_alltoall.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index c37823d8..4b4b92eb 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -131,6 +131,7 @@ Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: + \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the active set is ready to accept the \FUNC{shmem\_alltoall} data. @@ -138,6 +139,7 @@ on all \acp{PE} in the active set is not still in use from a prior call to a \FUNC{shmem\_alltoall} routine. \end{itemize} + Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for From 1a448673bde8157235cf24628cb8a9a2f7c8208f Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:10:02 -0400 Subject: [PATCH 55/72] Update content/collective_intro.tex Typo Co-authored-by: David Ozog --- content/collective_intro.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index a8bf37ff..249e0fbf 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -12,7 +12,7 @@ \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. -\item Collective routines that do not accept an active set +\item Collective routines that do not accept active set parameters and, as required, the default context. \end{DeprecateBlock} From 63d8554202ce460e8455a23277cdb1d50126844c Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:10:23 -0400 Subject: [PATCH 56/72] Update content/shmem_broadcast.tex White Space Co-authored-by: David Ozog --- content/shmem_broadcast.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 5aec7b9d..2470ec39 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -136,7 +136,7 @@ active set is not still in use from a prior call to an \openshmem collective routine. \end{itemize} - Otherwise, the behavior is undefined. + Otherwise, the behavior is undefined. Upon return from a active-based broadcast routine, the following are true for the local \ac{PE}: From 76321432e34799e3f045088a8fc154ea5492e983 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:12:21 -0400 Subject: [PATCH 57/72] Update shmem_reductions.tex Typo, uppercase --- content/shmem_reductions.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index be5543c2..79f0b42a 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -251,8 +251,8 @@ \subsubsubsection{PROD} \apiargument{IN}{source}{Symmetric address of an array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{nreduce}{the number of elements in the \dest{} and \source{} - arrays. in teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. +\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. In deprecated active-set based \ac{API} calls, \VAR{nreduce} must be of type integer.} From 1557a4eeabce2de6fb9ec50698bc40631e7560fe Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 11:53:45 -0400 Subject: [PATCH 58/72] Update shmem_team_split_strided API Note, arbirary to any positive integer. --- content/shmem_team_split_strided.tex | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index cd1e4c81..59decede 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -101,9 +101,8 @@ } \apinotes{ - The \FUNC{shmem\_team\_split\_strided} operation uses an arbitrary - \VAR{stride} argument. Arbitrary strides allow a greater number of - PE subsets to be expressed and can support a broader range of usage models. + The \FUNC{shmem\_team\_split\_strided} operation can take any positive integer value + \VAR{stride} argument. See the description of team handles and predefined teams in Section~\ref{subsec:team} for more information about team handle semantics and usage. From 48201357aa3133011d6457ac45de5e78572a8e1e Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:34:06 -0400 Subject: [PATCH 59/72] Fix Whitespace in shmem_alltoall --- content/shmem_alltoall.tex | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4b4b92eb..bcd53156 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -42,10 +42,10 @@ destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ - The number of elements to exchange for each \ac{PE}. - For \FUNC{shmem\_alltoallmem}, elements are bytes; - for \FUNC{shmem\_alltoall\{32,64\}}, elements are 4 or 8 bytes, - respectively. + The number of elements to exchange for each \ac{PE}. + For \FUNC{shmem\_alltoallmem}, elements are bytes; + for \FUNC{shmem\_alltoall\{32,64\}}, elements are 4 or 8 bytes, + respectively. } \begin{DeprecateBlock} @@ -105,14 +105,14 @@ \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the team is ready to accept the \FUNC{shmem\_alltoall} data. - \end{itemize} + \end{itemize} Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. - \end{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \end{itemize} \begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set @@ -145,8 +145,8 @@ Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. \item For active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} From 88ca7d935a67ff21cb31dc0f7cc6c8413f9dafe9 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:37:34 -0400 Subject: [PATCH 60/72] Fix whitespace shmem_broadcast --- content/shmem_broadcast.tex | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 2470ec39..49abd50b 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -96,9 +96,9 @@ Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item The \dest{} data object is updated. - \item The \source{} data object may be safely reused. - \end{itemize} + \item The \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} \begin{DeprecateBlock} \openshmem active-set broadcast routines are collective routines over an active set. @@ -111,30 +111,30 @@ For active-set-based broadcasts: \begin{itemize} - \item The \VAR{dest} object is updated on all PEs other than the root PE. - \item All \acp{PE} in the active set defined by the - \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet - must participate in the operation. - \item Only \acp{PE} in the active set may call the routine. If a - \ac{PE} not in the active set calls an active-set-based + \item The \VAR{dest} object is updated on all PEs other than the root PE. + \item All \acp{PE} in the active set defined by the + \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet + must participate in the operation. + \item Only \acp{PE} in the active set may call the routine. If a + \ac{PE} not in the active set calls an active-set-based collective routine, the behavior is undefined. - \item The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, + \item The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. - \item The value of \VAR{PE\_root} must be between \CONST{0} and + \item The value of \VAR{PE\_root} must be between \CONST{0} and \VAR{PE\_size $-$ 1}. - \item The same \VAR{pSync} work array must be passed by all \acp{PE} + \item The same \VAR{pSync} work array must be passed by all \acp{PE} in the active set. \end{itemize} Before any \ac{PE} calls a active-set-based broadcast routine, the following conditions must be ensured: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the broadcast - is ready to accept the broadcast data. - \item The \VAR{pSync} array on all \acp{PE} in the - active set is not still in use from a prior call to an \openshmem - collective routine. + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \item The \VAR{pSync} array on all \acp{PE} in the + active set is not still in use from a prior call to an \openshmem + collective routine. \end{itemize} Otherwise, the behavior is undefined. From ca0e495945dcc3ff58ad5079a8e1c22ad15c5730 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:38:52 -0400 Subject: [PATCH 61/72] Edit Whitespace in shmem_collect --- content/shmem_collect.tex | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 68b3e614..d14d8f17 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -67,12 +67,12 @@ operation to concatenate \VAR{nelems} data items from the \source{} array into the \dest{} array, over an \openshmem team in processor number order. - The resultant \dest{} array contains the contribution from + The resultant \dest{} array contains the contribution from \acp{PE} as follows: - \begin{itemize} - \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the - contribution from \ac{PE} \CONST{1} in the team, and so on. + \begin{itemize} + \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the + contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} @@ -96,9 +96,9 @@ in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} - \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the - contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. - \end{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} that participate in the operation. The same \dest{} and \source{} From a537c154259a7435176c146a899745efa9bc0868 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:40:34 -0400 Subject: [PATCH 62/72] Fix Whitespace in collective_intro --- content/collective_intro.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 249e0fbf..4996b178 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -4,21 +4,21 @@ \openshmem provides four types of collective routines: \begin{enumerate} -\item Collective routines that operate on teams use a team handle parameter to determine - which \acp{PE} will participate in the routine, and use resources encapsulated by the team object - to perform operations. See Section~\ref{subsec:team} for details on team management. + \item Collective routines that operate on teams use a team handle parameter to determine + which \acp{PE} will participate in the routine, and use resources encapsulated by the team object + to perform operations. See Section~\ref{subsec:team} for details on team management. -\begin{DeprecateBlock} -\item Collective routines that operate on active sets use a set of parameters to determine - which \acp{PE} will participate and what resources are used to perform operations. + \begin{DeprecateBlock} + \item Collective routines that operate on active sets use a set of parameters to determine + which \acp{PE} will participate and what resources are used to perform operations. -\item Collective routines that do not accept active set - parameters and, as required, the default context. -\end{DeprecateBlock} + \item Collective routines that do not accept active set + parameters and, as required, the default context. + \end{DeprecateBlock} -\item Collective routines that do not accept team - parameters, which implicitly operate on the world team and, as - required, the default context. + \item Collective routines that do not accept team + parameters, which implicitly operate on the world team and, as + required, the default context. \end{enumerate} Concurrent accesses to symmetric memory by an \openshmem collective From 633786f32e0832c214c5c09b183e7b1ae5c8c8c1 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:59:23 -0400 Subject: [PATCH 63/72] Fix Typo in shmem_alltoall --- content/shmem_alltoall.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index bcd53156..4e145c26 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -35,7 +35,7 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - particpating \acp{PE}. + participating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to From c8e9ef6a26f423b2d9a49a29d4f5380d86b41153 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 29 Aug 2024 14:58:02 -0400 Subject: [PATCH 64/72] Update content/shmem_team_split_strided.tex Co-authored-by: David Ozog --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 59decede..26616d39 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -101,7 +101,7 @@ } \apinotes{ - The \FUNC{shmem\_team\_split\_strided} operation can take any positive integer value + The \FUNC{shmem\_team\_split\_strided} operation can take any integer value \VAR{stride} argument. See the description of team handles and predefined teams in From bc114219e340bd9fb3315b208b810f605a20124b Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 30 Aug 2024 10:24:23 -0400 Subject: [PATCH 65/72] Update content/shmem_broadcast.tex typo Co-authored-by: Muhammad Awad --- content/shmem_broadcast.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 49abd50b..d67c2fb0 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -138,7 +138,7 @@ \end{itemize} Otherwise, the behavior is undefined. - Upon return from a active-based broadcast routine, the following are true for the local + Upon return from an active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} \item If the current PE is not the root PE, the \dest{} data object is updated. From bda943c3351ea51128ee10321e5b7c0f72194b4b Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 30 Aug 2024 11:00:53 -0400 Subject: [PATCH 66/72] Remove active language in reduction api args --- content/shmem_reductions.tex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 79f0b42a..46cb0abe 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -252,9 +252,7 @@ \subsubsubsection{PROD} contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. - In deprecated active-set based \ac{API} calls, - \VAR{nreduce} must be of type integer.} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t.} \begin{DeprecateBlock} \apiargument{IN}{nreduce}{In active-set based \ac{API} calls, From b794cea8dad68c01633878e412274c9cfa88fab7 Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Fri, 30 Aug 2024 11:53:46 -0700 Subject: [PATCH 67/72] Remove unnecessary new line --- content/shmem_alltoall.tex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4e145c26..190232d7 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -89,9 +89,7 @@ Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} participating in the operation and a \ac{PE} \VAR{j} that is the \lth \ac{PE} - participating in the operation, - - \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to + participating in the operation, \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to the \kth block of the \VAR{dest} object of \ac{PE} \VAR{j}. From c59684132b12b93810b960e1c6b45e2a771c3947 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 16:43:36 -0400 Subject: [PATCH 68/72] scan: 488 section committee edits (nelems/overlap) --- content/shmem_scan.tex | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 618a51a0..69f05bc9 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -6,16 +6,16 @@ %% C11 \begin{C11synopsis} -int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); -int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); +int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{teamreducetypes}. %% C/C++ \begin{Csynopsis} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{Csynopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation and has a corresponding \TYPENAME{} as specified @@ -26,17 +26,17 @@ The team over which to perform the operation. } \apiargument{OUT}{dest}{ - Symmetric address of an array, of length \VAR{nreduce} elements, - to receive the result of the scan routines. The type of + Symmetric address of an array, of length \VAR{nelems} elements, + to receive the result of the scan operation. The type of \dest{} should match that implied in the SYNOPSIS section. } \apiargument{IN}{source}{ - Symmetric address of an array, of length \VAR{nreduce} elements, - that contains one element for each separate scan routine. + Symmetric address of an array, of length \VAR{nelems} elements, + that contains one element for each separate scan operation. The type of \source{} should match that implied in the SYNOPSIS section. } - \apiargument{IN}{nreduce}{ + \apiargument{IN}{nelems}{ The number of elements in the \dest{} and \source{} arrays. } \end{apiarguments} @@ -49,7 +49,7 @@ multiple \acp{PE}. The scan operations are performed with the SUM operator. - The \VAR{nreduce} argument determines the number of separate scan + The \VAR{nelems} argument determines the number of separate scan operations to perform. The \source{} array on all \acp{PE} participating in the operation provides one element for each scan. The results of the scan operations are placed in the \dest{} array @@ -75,10 +75,14 @@ \end{cases} \end{equation*} + + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses - corresponding to buffers that do not overlap in memory. That is, - they must be completely overlapping or completely disjoint. + corresponding to buffers that do not overlap in memory. + That is, they must be completely overlapping (sometimes referred to as an + ``in place'' reduction) or completely disjoint. Team-based scan routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in From 076580775dc377f4e41bae48ced6f8d21af999ba Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 11:56:43 -0400 Subject: [PATCH 69/72] collectives: clarify src buffer entry requirements --- content/shmem_alltoall.tex | 12 ++++++++---- content/shmem_broadcast.tex | 15 +++++++++------ content/shmem_collect.tex | 11 +++++++++++ content/shmem_reductions.tex | 12 ++++++++---- content/shmem_scan.tex | 13 ++++++++++--- 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 190232d7..07317b69 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -98,12 +98,16 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, - the following conditions must be ensured: + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following + conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \VAR{dest} data object on all \acp{PE} in the team is - ready to accept the \FUNC{shmem\_alltoall} data. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be + read by any \ac{PE} in the team. \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index d67c2fb0..05b67068 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -85,13 +85,16 @@ the team. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following - conditions must be ensured: + Before any \ac{PE} calls a broadcast routine, the following conditions + must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the broadcast - is ready to accept the broadcast data. - \end{itemize} - Otherwise, the behavior is undefined. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local root \ac{PE} is ready to be + read by any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index d14d8f17..479c93e2 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -88,6 +88,17 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a collect routine, the following conditions must + be ensured, otherwise the behavior is undefined: + \begin{itemize} + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read + by any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. + \begin{DeprecateBlock} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 46cb0abe..888a51e1 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -295,12 +295,16 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: + Before any \ac{PE} calls a reduction routine, the following conditions + must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the reduction - is ready to accept the results of the \OPR{reduction}. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the results of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read by + any \ac{PE} in the team. \end{itemize} - Otherwise, the behavior is undefined. + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a reduction routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 69f05bc9..f53f1acc 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -90,9 +90,16 @@ \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a scan routine, the \dest{} array on all - \acp{PE} participating in the operation must be ready to accept the - results of the operation. Otherwise, the behavior is undefined. + Before any \ac{PE} calls a scan routine, the following conditions must be + ensured, otherwise the behavior is undefined: + \begin{itemize} + \item The \dest{} array on all \acp{PE} in the team is ready to accept + the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read by + any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array From 00bcc40731eac1a6d1bbeb00181e76a6b2dc61d1 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 15:29:21 -0400 Subject: [PATCH 70/72] collectives: "array" instead of source "buffer" --- content/shmem_alltoall.tex | 4 ++-- content/shmem_broadcast.tex | 4 ++-- content/shmem_collect.tex | 4 ++-- content/shmem_reductions.tex | 4 ++-- content/shmem_scan.tex | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 07317b69..f271de11 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -103,11 +103,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 05b67068..bd936b5f 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -90,11 +90,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local root \ac{PE} is ready to be + \item The \source{} array at the local root \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 479c93e2..b7e2d3fa 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -93,11 +93,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. \begin{DeprecateBlock} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 888a51e1..fa48bb3d 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -300,11 +300,11 @@ \subsubsubsection{PROD} \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the results of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read by + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a reduction routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index f53f1acc..e55b2b7e 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -95,11 +95,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read by + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array From d9d3e5811b0b454795b40afa0cdc5f75dda04b05 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 5 Sep 2024 20:57:01 -0400 Subject: [PATCH 71/72] DocEdit: Update Version to 1.6 Signed-off-by: James Dinan --- utils/defs.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/defs.tex b/utils/defs.tex index 6751bd59..fa2d0d99 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -34,7 +34,7 @@ \newcommand{\newtext}[1]{\textcolor{ForestGreen}{#1}} \newcommand{\oldtext}[1]{\textcolor{magenta}{\sout{#1}}} -\newcommand{\insertDocVersion}{1.5} +\newcommand{\insertDocVersion}{1.6} \newcommand{\openshmem}[1][]{% {Open\-SHMEM\ifthenelse{\equal{#1}{}}{}{~#1}}\xspace} \newcommand{\HEADER}[1]{\textit{#1}} From 82b8e1976154339c6bc0c2a1f628765e4f201612 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 5 Sep 2024 20:57:25 -0400 Subject: [PATCH 72/72] DocEdit: Whitespace Signed-off-by: James Dinan --- content/shmem_ptr.tex | 1 - 1 file changed, 1 deletion(-) diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index fe1a3236..cc76570b 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -24,7 +24,6 @@ of an \openshmem routine that requires a symmetric address results in undefined behavior. - The \FUNC{shmem\_ptr} routine can provide efficient means to accomplish communication, for example when a sequence of reads and writes to a data object on a remote \ac{PE} does not match the access pattern provided in an