diff --git a/content/backmatter.tex b/content/backmatter.tex index f6480e75..89bbe837 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -665,6 +665,14 @@ \section{Version 1.6} \FUNC{shmem\_ibput}. \ChangelogRef{subsec:shmem_ibget, subsec:shmem_ibput}% % +\item Added \FUNC{shmem\_signal\_add} and \FUNC{shmem\_signal\_set} to + update a remote flag without associated data transfer of a put-with-signal operation. +\ChangelogRef{subsec:shmem_signal_add, subsec:shmem_signal_set}% +% +\item Clarified that \OPR{Fence} operations only guarantee ordering for + operations that are performed on the same context. +\ChangelogRef{subsec:shmem_fence}% +% \item Added a team-based pointer query routine: \FUNC{shmem\_team\_ptr}. \ChangelogRef{subsec:shmem_team_ptr}% @@ -691,7 +699,6 @@ \section{Version 1.6} 2 to enable profiling with profile library defined effects and additional arguments. \ChangelogRef{subsec:shmem_pcontrol} ->>>>>>> master % \end{itemize} diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index e7b116ea..a470a477 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -24,7 +24,7 @@ \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any subsequent operations on symmetric data - objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, + objects to the same \ac{PE} on the same context. \FUNC{shmem\_fence} guarantees order of delivery, not completion. It does not guarantee order of delivery of nonblocking \GET{} or values fetched by nonblocking \ac{AMO} routines. If \VAR{ctx} has the value \CONST{SHMEM\_CTX\_INVALID}, no operation is diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index c01dd899..ff933b35 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -283,9 +283,11 @@ \subsubsubsection{PROD} provides one element for each reduction. The results of the reductions are placed in the \dest{} array on all \acp{PE} participating in the reduction. + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses corresponding to buffers that - do not overlap in memory. That is, they must be completely overlapping or + do not overlap in memory. That is, they must be completely overlapping (sometimes referred to as an ``in place'' reduction) or completely disjoint. Team-based reduction routines operate over all \acp{PE} in the provided team argument. All diff --git a/content/shmem_signal_add.tex b/content/shmem_signal_add.tex new file mode 100644 index 00000000..272b03a6 --- /dev/null +++ b/content/shmem_signal_add.tex @@ -0,0 +1,47 @@ +\apisummary{ + Adds to a signal value of a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_signal\_add}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +\end{C11synopsis} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_signal\_add}@(const uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_signal\_add}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{ + A context handle specifying the context on which to perform the + operation. When this argument is not provided, the operation is + performed on the default context. + } + \apiargument{OUT}{sig\_addr}{ + Symmetric address of the signal data object to be updated on the + remote \ac{PE}. + } + \apiargument{IN}{signal}{ + Unsigned 64-bit value that is used for updating the remote + \VAR{sig\_addr} signal data object. + } + \apiargument{IN}{pe}{ + \ac{PE} number of the remote \ac{PE}. + } +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_signal\_add} adds \VAR{value} to the signal data + object pointed to by \VAR{sig\_addr} on \ac{PE}~\VAR{pe}. + The update to the \VAR{sig\_addr} signal object at the calling + \ac{PE} is expected to satisfy the atomicity guarantees as described + in Section~\ref{subsec:signal_atomicity}. +} + +\apireturnvalues{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_signal_set.tex b/content/shmem_signal_set.tex new file mode 100644 index 00000000..00c70345 --- /dev/null +++ b/content/shmem_signal_set.tex @@ -0,0 +1,47 @@ +\apisummary{ + Sets the signal value of a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_signal\_set}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +\end{C11synopsis} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_signal\_set}@(const uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_signal\_set}@(shmem_ctx_t ctx, const uint64_t *sig_addr, uint64_t signal, int pe); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{ + A context handle specifying the context on which to perform the + operation. When this argument is not provided, the operation is + performed on the default context. + } + \apiargument{OUT}{sig\_addr}{ + Symmetric address of the signal data object to be updated on the + remote \ac{PE}. + } + \apiargument{IN}{signal}{ + Unsigned 64-bit value that is used for updating the remote + \VAR{sig\_addr} signal data object. + } + \apiargument{IN}{pe}{ + \ac{PE} number of the remote \ac{PE}. + } +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_signal\_set} writes \VAR{value} into the signal data + object pointed to by \VAR{sig\_addr} on \ac{PE}~\VAR{pe}. + The update to the \VAR{sig\_addr} signal object at the calling + \ac{PE} is expected to satisfy the atomicity guarantees as described + in Section~\ref{subsec:signal_atomicity}. +} + +\apireturnvalues{ + None. +} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 1cbe7286..23e2d624 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -304,13 +304,17 @@ \subsubsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_XOR\_NBI}} \subsection{Signaling Operations}\label{sec:shmem_signal} This section specifies the OpenSHMEM support for \OPR{put-with-signal}, -nonblocking \OPR{put-with-signal}, and \OPR{signal-fetch} routines. The +nonblocking \OPR{put-with-signal}, and \OPR{signal-\{add, fetch, set\}} routines. The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently -updating a remote flag to signal completion. The signal-fetch routine provides -support for fetching a signal update operation. - -\openshmem \OPR{put-with-signal} routines specified in this section have two +updating a remote flag to signal completion. +The signal-add and signal-set routines provide methods for updating +the signal object without the associated data transfer of a +put-with-signal operation. +The signal-fetch routine provides support for reading a local signal value. + +\openshmem \OPR{put-with-signal} and \OPR{signal-\{add, set\}} +routines specified in this section have two variants. In one of the variants, the context handle, \VAR{ctx}, is explicitly passed as an argument. In this variant, the operation is performed on the specified context. If the context handle \VAR{ctx} does not correspond to a @@ -321,16 +325,20 @@ \subsection{Signaling Operations}\label{sec:shmem_signal} \subsubsection{Atomicity Guarantees for Signaling Operations} \label{subsec:signal_atomicity} All signaling operations put-with-signal, nonblocking put-with-signal, and -signal-fetch are performed on a signal data object, a remotely accessible +signal-\{add, fetch, set\} are performed on a signal data object, a remotely accessible symmetric object of type \VAR{uint64\_t}. A signal operator in the -put-with-signal routine is a \openshmem library constant that determines the +put-with-signal routine is an \openshmem library constant that determines the type of update to be performed as a signal on the signal data object. -All signaling operations on the signal data object completes as if performed +All signaling operations on the signal data object complete as if performed atomically with respect to the following: \begin{itemize} \item other blocking or nonblocking variant of the put-with-signal routine that updates the signal data object using the same signal update operator; + \item signal-add routine when the put-with-signal routine uses the + \LibConstRef{SHMEM\_SIGNAL\_ADD} signal operator; + \item signal-set routine when the put-with-signal routine uses the + \LibConstRef{SHMEM\_SIGNAL\_SET} signal operator; \item signal-fetch routine that fetches the signal data object; and \item any point-to-point synchronization routine that accesses the signal data object. @@ -360,9 +368,15 @@ \subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} \subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} \input{content/shmem_put_signal_nbi.tex} +\subsubsection{\textbf{SHMEM\_SIGNAL\_ADD}}\label{subsec:shmem_signal_add} +\input{content/shmem_signal_add.tex} + \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \input{content/shmem_signal_fetch.tex} +\subsubsection{\textbf{SHMEM\_SIGNAL\_SET}}\label{subsec:shmem_signal_set} +\input{content/shmem_signal_set.tex} + \subsection{Collective Routines}\label{subsec:coll}